101: Linear Regression and export to ONNX

scikit-learn and torch to train a linear regression.

data

import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression, SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import torch
from onnxruntime import InferenceSession
from experimental_experiment.helpers import pretty_onnx
from onnx_array_api.plotting.graphviz_helper import plot_dot
from experimental_experiment.torch_interpreter import to_onnx


X, y = make_regression(1000, n_features=5, noise=10.0, n_informative=2)
print(X.shape, y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y)
(1000, 5) (1000,)

scikit-learn: the simple regression

A^* = (X'X)^{-1}X'Y

clr = LinearRegression()
clr.fit(X_train, y_train)

print(f"coefficients: {clr.coef_}, {clr.intercept_}")
coefficients: [31.55793987 -0.15404633 -0.29814966 -0.12105706  4.35872861], 0.10233234583956419

Evaluation

y_pred = clr.predict(X_test)
l2 = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"LinearRegression: l2={l2}, r2={r2}")
LinearRegression: l2=98.79912059521222, r2=0.9060205536990547

scikit-learn: SGD algorithm

SGD = Stochastic Gradient Descent

clr = SGDRegressor(max_iter=5, verbose=1)
clr.fit(X_train, y_train)

print(f"coefficients: {clr.coef_}, {clr.intercept_}")
-- Epoch 1
Norm: 27.04, NNZs: 5, Bias: 0.677888, T: 750, Avg. loss: 143.210602
Total training time: 0.00 seconds.
-- Epoch 2
Norm: 30.49, NNZs: 5, Bias: 0.405055, T: 1500, Avg. loss: 55.468876
Total training time: 0.00 seconds.
-- Epoch 3
Norm: 31.30, NNZs: 5, Bias: 0.126806, T: 2250, Avg. loss: 51.837841
Total training time: 0.00 seconds.
-- Epoch 4
Norm: 31.69, NNZs: 5, Bias: 0.142602, T: 3000, Avg. loss: 51.466929
Total training time: 0.00 seconds.
-- Epoch 5
Norm: 31.72, NNZs: 5, Bias: 0.088899, T: 3750, Avg. loss: 51.384296
Total training time: 0.00 seconds.
/home/xadupre/vv/this312/lib/python3.12/site-packages/sklearn/linear_model/_stochastic_gradient.py:1603: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.
  warnings.warn(
coefficients: [31.41452968 -0.20326068 -0.24422897 -0.18402664  4.35053917], [0.08889909]

Evaluation

y_pred = clr.predict(X_test)
sl2 = mean_squared_error(y_test, y_pred)
sr2 = r2_score(y_test, y_pred)
print(f"SGDRegressor: sl2={sl2}, sr2={sr2}")
SGDRegressor: sl2=98.39413498803201, sr2=0.9064057830704636

Linrar Regression with pytorch

class TorchLinearRegression(torch.nn.Module):
    def __init__(self, n_dims: int, n_targets: int):
        super().__init__()
        self.linear = torch.nn.Linear(n_dims, n_targets)

    def forward(self, x):
        return self.linear(x)


def train_loop(dataloader, model, loss_fn, optimizer):
    total_loss = 0.0

    # Set the model to training mode - important for batch normalization and dropout layers
    # Unnecessary in this situation but added for best practices
    model.train()
    for X, y in dataloader:
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred.ravel(), y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        # training loss
        total_loss += loss

    return total_loss


model = TorchLinearRegression(X_train.shape[1], 1)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)
loss_fn = torch.nn.MSELoss()

device = "cpu"
model = model.to(device)
dataset = torch.utils.data.TensorDataset(
    torch.Tensor(X_train).to(device), torch.Tensor(y_train).to(device)
)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=1)


for i in range(5):
    loss = train_loop(dataloader, model, loss_fn, optimizer)
    print(f"iteration {i}, loss={loss}")
iteration 0, loss=312444.125
iteration 1, loss=91631.875
iteration 2, loss=78284.1171875
iteration 3, loss=77413.71875
iteration 4, loss=77361.1484375

Let’s check the error

y_pred = model(torch.Tensor(X_test)).detach().numpy()
tl2 = mean_squared_error(y_test, y_pred)
tr2 = r2_score(y_test, y_pred)
print(f"TorchLinearRegression: tl2={tl2}, tr2={tr2}")
TorchLinearRegression: tl2=99.30217465702843, tr2=0.905542039903546

And the coefficients.

print("coefficients:")
for p in model.parameters():
    print(p)
coefficients:
Parameter containing:
tensor([[31.5076,  0.1727, -0.3712, -0.2093,  4.3239]], requires_grad=True)
Parameter containing:
tensor([-0.0265], requires_grad=True)

Conversion to ONNX

Let’s convert it to ONNX.

onx = to_onnx(model, (torch.Tensor(X_test[:2]),), input_names=["x"])

Let’s check it is work.

sess = InferenceSession(onx.SerializeToString(), providers=["CPUExecutionProvider"])
res = sess.run(None, {"x": X_test.astype(np.float32)[:2]})
print(res)
[array([[-49.87089 ],
       [ 52.269424]], dtype=float32)]

And the model.

plot torch linreg 101

With dynamic shapes

The dynamic shapes are used by torch.export.export() and must follow the convention described there.

onx = to_onnx(
    model,
    (torch.Tensor(X_test[:2]),),
    input_names=["x"],
    dynamic_shapes={"x": {0: torch.export.Dim("batch")}},
)

print(pretty_onnx(onx))
opset: domain='' version=18
doc_string: large_model=False, inline=False, external_threshold=102...
input: name='x' type=dtype('float32') shape=['batch', 5]
init: name='linear.weight' type=float32 shape=(1, 5)                  -- DynamoInterpret.placeholder.1/P(linear.weight)
init: name='linear.bias' type=float32 shape=(1,) -- array([-0.0265037], dtype=float32)-- DynamoInterpret.placeholder.1/P(linear.bias)
Transpose(linear.weight, perm=[1,0]) -> _onx_transpose0
  Transpose(_onx_transpose0, perm=[1,0]) -> GemmTransposePattern--_onx_transpose0
    Gemm(x, GemmTransposePattern--_onx_transpose0, linear.bias, transB=1) -> output_0
output: name='output_0' type=dtype('float32') shape=['batch', 1]

Total running time of the script: (0 minutes 8.298 seconds)

Related examples

101: A custom backend for torch

101: A custom backend for torch

101: Some dummy examples with torch.export.export

101: Some dummy examples with torch.export.export

102: Convolution and Matrix Multiplication

102: Convolution and Matrix Multiplication

Gallery generated by Sphinx-Gallery