Note
Go to the end to download the full example code.
101: Linear Regression and export to ONNX¶
scikit-learn and torch to train a linear regression.
data¶
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import torch
from onnxruntime import InferenceSession
from experimental_experiment.helpers import pretty_onnx
from onnx_array_api.plotting.graphviz_helper import plot_dot
from experimental_experiment.torch_interpreter import to_onnx
X, y = make_regression(1000, n_features=5, noise=10.0, n_informative=2)
print(X.shape, y.shape)
X_train, X_test, y_train, y_test = train_test_split(X, y)
(1000, 5) (1000,)
scikit-learn: the simple regression¶
clr = LinearRegression()
clr.fit(X_train, y_train)
print(f"coefficients: {clr.coef_}, {clr.intercept_}")
coefficients: [-0.4156729 68.16768474 -0.14817797 -0.13074446 79.68740891], -0.26352200022452266
Evaluation¶
LinearRegression: l2=94.01302744583698, r2=0.9917297757770004
scikit-learn: SGD algorithm¶
SGD = Stochastic Gradient Descent
clr = SGDRegressor(max_iter=5, verbose=1)
clr.fit(X_train, y_train)
print(f"coefficients: {clr.coef_}, {clr.intercept_}")
-- Epoch 1
Norm: 88.27, NNZs: 5, Bias: 0.844179, T: 750, Avg. loss: 1130.952914
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 100.02, NNZs: 5, Bias: 0.239097, T: 1500, Avg. loss: 104.278971
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 103.16, NNZs: 5, Bias: -0.042557, T: 2250, Avg. loss: 59.880112
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 104.22, NNZs: 5, Bias: -0.102363, T: 3000, Avg. loss: 55.392890
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 104.58, NNZs: 5, Bias: -0.137567, T: 3750, Avg. loss: 54.760405
Total training time: 0.00 seconds.
/home/xadupre/vv/this/lib/python3.10/site-packages/sklearn/linear_model/_stochastic_gradient.py:1616: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.
warnings.warn(
coefficients: [-6.13996456e-01 6.78142089e+01 -7.22902356e-02 -1.09481207e-01
7.96098393e+01], [-0.13756652]
Evaluation
SGDRegressor: sl2=96.61863163474834, sr2=0.9915005635979639
torch¶
class TorchLinearRegression(torch.nn.Module):
def __init__(self, n_dims: int, n_targets: int):
super().__init__()
self.linear = torch.nn.Linear(n_dims, n_targets)
def forward(self, x):
return self.linear(x)
def train_loop(dataloader, model, loss_fn, optimizer):
total_loss = 0.0
# Set the model to training mode - important for batch normalization and dropout layers
# Unnecessary in this situation but added for best practices
model.train()
for X, y in dataloader:
# Compute prediction and loss
pred = model(X)
loss = loss_fn(pred.ravel(), y)
# Backpropagation
loss.backward()
optimizer.step()
optimizer.zero_grad()
# training loss
total_loss += loss
return total_loss
model = TorchLinearRegression(X_train.shape[1], 1)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()
device = "cpu"
model = model.to(device)
dataset = torch.utils.data.TensorDataset(
torch.Tensor(X_train).to(device), torch.Tensor(y_train).to(device)
)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1)
for i in range(5):
loss = train_loop(dataloader, model, loss_fn, optimizer)
print(f"iteration {i}, loss={loss}")
iteration 0, loss=2694195.5
iteration 1, loss=228070.96875
iteration 2, loss=90632.5
iteration 3, loss=82579.359375
iteration 4, loss=82213.4140625
Let’s check the error
TorchLinearRegression: tl2=94.02586960430797, tr2=0.9917286460662258
And the coefficients.
print("coefficients:")
for p in model.parameters():
print(p)
coefficients:
Parameter containing:
tensor([[-0.5584, 68.3682, -0.1259, 0.1630, 79.7769]], requires_grad=True)
Parameter containing:
tensor([-0.2713], requires_grad=True)
Conversion to ONNX¶
Let’s convert it to ONNX.
onx = to_onnx(model, (torch.Tensor(X_test[:2]),), input_names=["x"])
Let’s check it is work.
sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
res = sess.run(None, {"x": X_test.astype(np.float32)[:2]})
print(res)
[array([[ 99.81213 ],
[-20.478058]], dtype=float32)]
And the model.
<Axes: >
With dynamic shapes¶
onx = to_onnx(
model,
(torch.Tensor(X_test[:2]),),
input_names=["x"],
dynamic_shapes={"x": {0: torch.export.Dim("batch")}},
)
print(pretty_onnx(onx))
opset: domain='' version=18
doc_string: large_model=False, inline=False, external_threshold=102...
input: name='x' type=dtype('float32') shape=['batch', 5]
init: name='p_linear_weight' type=dtype('float32') shape=(1, 5)
init: name='p_linear_bias' type=dtype('float32') shape=(1,) -- array([-0.27127013], dtype=float32)
Transpose(p_linear_weight, perm=[1,0]) -> _onx_transpose0
Transpose(_onx_transpose0, perm=[1,0]) -> GemmTransposePattern--_onx_transpose0
Gemm(x, GemmTransposePattern--_onx_transpose0, p_linear_bias, transB=1) -> output_0
output: name='output_0' type=dtype('float32') shape=['batch', 1]
Total running time of the script: (0 minutes 2.789 seconds)