Note
Go to the end to download the full example code.
101: Linear Regression and export to ONNX¶
scikit-learn and torch to train a linear regression.
data¶
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import torch
from onnxruntime import InferenceSession
from experimental_experiment.helpers import pretty_onnx
from onnx_array_api.plotting.graphviz_helper import plot_dot
from experimental_experiment.torch_interpreter import to_onnx
X, y = make_regression(1000, n_features=5, noise=10.0, n_informative=2)
print(X.shape, y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y)
(1000, 5) (1000,)
scikit-learn: the simple regression¶
clr = LinearRegression()
clr.fit(X_train, y_train)
print(f"coefficients: {clr.coef_}, {clr.intercept_}")
coefficients: [89.83725137 -0.09857063 -0.30928284 53.93268712 -0.24229392], 0.3815247294485997
Evaluation¶
LinearRegression: l2=100.7128387164218, r2=0.9908387720555316
scikit-learn: SGD algorithm¶
SGD = Stochastic Gradient Descent
clr = SGDRegressor(max_iter=5, verbose=1)
clr.fit(X_train, y_train)
print(f"coefficients: {clr.coef_}, {clr.intercept_}")
-- Epoch 1
Norm: 87.81, NNZs: 5, Bias: -0.170868, T: 750, Avg. loss: 1166.994152
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 99.74, NNZs: 5, Bias: -0.105077, T: 1500, Avg. loss: 95.897394
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 103.05, NNZs: 5, Bias: 0.139772, T: 2250, Avg. loss: 49.810695
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 103.99, NNZs: 5, Bias: 0.218756, T: 3000, Avg. loss: 45.172140
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 104.53, NNZs: 5, Bias: 0.261402, T: 3750, Avg. loss: 44.562470
Total training time: 0.00 seconds.
~/vv/this312/lib/python3.12/site-packages/sklearn/linear_model/_stochastic_gradient.py:1579: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.
warnings.warn(
coefficients: [ 8.96312266e+01 6.05541848e-02 -3.42289160e-01 5.37878356e+01
-2.23317286e-01], [0.26140222]
Evaluation
SGDRegressor: sl2=101.38892888321155, sr2=0.9907772722883926
Linrar Regression with pytorch¶
class TorchLinearRegression(torch.nn.Module):
def __init__(self, n_dims: int, n_targets: int):
super().__init__()
self.linear = torch.nn.Linear(n_dims, n_targets)
def forward(self, x):
return self.linear(x)
def train_loop(dataloader, model, loss_fn, optimizer):
total_loss = 0.0
# Set the model to training mode - important for batch normalization and dropout layers
# Unnecessary in this situation but added for best practices
model.train()
for X, y in dataloader:
# Compute prediction and loss
pred = model(X)
loss = loss_fn(pred.ravel(), y)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
# training loss
total_loss += loss
return total_loss
model = TorchLinearRegression(X_train.shape[1], 1)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
device = "cpu"
model = model.to(device)
dataset = torch.utils.data.TensorDataset(
torch.Tensor(X_train).to(device), torch.Tensor(y_train).to(device)
)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1)
for i in range(5):
loss = train_loop(dataloader, model, loss_fn, optimizer)
print(f"iteration {i}, loss={loss}")
iteration 0, loss=2677243.25
iteration 1, loss=216912.296875
iteration 2, loss=76338.71875
iteration 3, loss=67512.40625
iteration 4, loss=66896.0390625
Let’s check the error
TorchLinearRegression: tl2=101.23697047513438, tr2=0.9907910950108202
And the coefficients.
print("coefficients:")
for p in model.parameters():
print(p)
coefficients:
Parameter containing:
tensor([[ 8.9716e+01, -4.1962e-02, -2.5049e-01, 5.3846e+01, -8.3700e-02]],
requires_grad=True)
Parameter containing:
tensor([0.1939], requires_grad=True)
Conversion to ONNX¶
Let’s convert it to ONNX.
onx = to_onnx(model, (torch.Tensor(X_test[:2]),), input_names=["x"])
Let’s check it is work.
sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
res = sess.run(None, {"x": X_test.astype(np.float32)[:2]})
print(res)
[array([[ 77.554375],
[192.41856 ]], dtype=float32)]
And the model.

With dynamic shapes¶
The dynamic shapes are used by torch.export.export() and must
follow the convention described there. The dynamic dimension allows
any value. The model is then valid for many different shapes.
That’s usually what users need.
onx = to_onnx(
model,
(torch.Tensor(X_test[:2]),),
input_names=["x"],
dynamic_shapes={"x": {0: torch.export.Dim("batch")}},
)
print(pretty_onnx(onx))
opset: domain='' version=18
input: name='x' type=dtype('float32') shape=['batch', 5]
init: name='GemmTransposePattern--p_linear_weight::T10' type=float32 shape=(1, 5)-- GraphBuilder.constant_folding.from/fold(init7_s2_1_-1,p_linear_weight::T10)##p_linear_weight::T10/GraphBuilder.constant_folding.from/fold(p_linear_weight)##p_linear_weight/DynamoInterpret.placeholder.1/P(linear.weight)##init7_s2_1_-1/TransposeEqualReshapePattern.apply.new_shape
init: name='linear.bias' type=float32 shape=(1,) -- array([0.19394061], dtype=float32)-- DynamoInterpret.placeholder.1/P(linear.bias)
Gemm(x, GemmTransposePattern--p_linear_weight::T10, linear.bias, transB=1) -> output_0
output: name='output_0' type=dtype('float32') shape=['batch', 1]
For simplicity, it is possible to use torch.export.Dim.DYNAMIC
or torch.export.Dim.AUTO.
onx = to_onnx(
model,
(torch.Tensor(X_test[:2]),),
input_names=["x"],
dynamic_shapes={"x": {0: torch.export.Dim.DYNAMIC}},
)
print(pretty_onnx(onx))
opset: domain='' version=18
input: name='x' type=dtype('float32') shape=['batch', 5]
init: name='GemmTransposePattern--p_linear_weight::T10' type=float32 shape=(1, 5)-- GraphBuilder.constant_folding.from/fold(init7_s2_1_-1,p_linear_weight::T10)##p_linear_weight::T10/GraphBuilder.constant_folding.from/fold(p_linear_weight)##p_linear_weight/DynamoInterpret.placeholder.1/P(linear.weight)##init7_s2_1_-1/TransposeEqualReshapePattern.apply.new_shape
init: name='linear.bias' type=float32 shape=(1,) -- array([0.19394061], dtype=float32)-- DynamoInterpret.placeholder.1/P(linear.bias)
Gemm(x, GemmTransposePattern--p_linear_weight::T10, linear.bias, transB=1) -> output_0
output: name='output_0' type=dtype('float32') shape=['batch', 1]
Total running time of the script: (0 minutes 2.018 seconds)
Related examples
201: Use torch to export a scikit-learn model into ONNX
201: Evaluate different ways to export a torch model to ONNX
101: Onnx Model Optimization based on Pattern Rewriting