Compare multiple versions of onnxruntime#

One important task is check the onnxruntime does not run slower for any new version. The following tools were developped for that purpose.

Step 1: save a test#

We need to first to save the model and the input onnxruntime must be evaluated on. This is done with function save_for_benchmark_or_test.

<<<

import os
import numpy as np
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor
from skl2onnx import to_onnx
from onnx_extended.tools.run_onnx import save_for_benchmark_or_test

# The dimension of the problem.
batch_size = 100
n_features = 10
n_trees = 2
max_depth = 3

# Let's create model.
X, y = make_regression(batch_size * 2, n_features=n_features, n_targets=1)
X, y = X.astype(np.float32), y.astype(np.float32)
model = RandomForestRegressor(n_trees, max_depth=max_depth, n_jobs=-1)
model.fit(X[:batch_size], y[:batch_size])

# target_opset is used to select opset an old version of onnxruntime can process.
onx = to_onnx(model, X[:1], target_opset=17)

# Let's save the model and the inputs on disk.
folder = "test_ort_version"
if not os.path.exists(folder):
    os.mkdir(folder)

inputs = [X]
save_for_benchmark_or_test(folder, "rf", onx, inputs)

# Let's see what was saved.
for r, d, f in os.walk(folder):
    for name in f:
        full_name = os.path.join(r, name)
        print(f"{os.stat(full_name).st_size / 2 ** 10:1.1f} Kb: {full_name}")

>>>

    1.6 Kb: test_ort_version/rf/model.onnx
    7.8 Kb: test_ort_version/rf/test_data_set_0/input_0.pb
    0.8 Kb: test_ort_version/rf/test_data_set_0/output_0.pb

The output are not used to measure the performance but it can be used to evaluate the discrepancies.

Step 2: evaluate multiple versions of onnxruntime#

It calls function bench_virtual.

import os
from onnx_extended.tools.run_onnx import bench_virtual

folder = os.path.abspath("test_ort_version/rf")
virtual_env = os.path.abspath("venv")

runtimes = ["ReferenceEvaluator", "CReferenceEvaluator", "onnxruntime"]
modules = [
    {"onnx-extended": "0.2.1", "onnx": "1.14.1", "onnxruntime": "1.16.0"},
    {"onnx-extended": "0.2.1", "onnx": "1.14.1", "onnxruntime": "1.15.1"},
    {"onnx-extended": "0.2.1", "onnx": "1.14.1", "onnxruntime": "1.14.1"},
    {"onnx-extended": "0.2.1", "onnx": "1.14.1", "onnxruntime": "1.13.1"},
    {"onnx-extended": "0.2.1", "onnx": "1.14.1", "onnxruntime": "1.12.1"},
]
filter_fct = (
    lambda rt, modules: rt == "onnxruntime" or modules["onnxruntime"] == "1.16.0"
)

df = bench_virtual(
    folder,
    virtual_env,
    verbose=1,
    modules=modules,
    runtimes=runtimes,
    warmup=5,
    repeat=10,
    save_as_dataframe="results.csv",
    filter_fct=filter_fct,
)

columns = ["runtime", "b_avg_time", "runtime", "v_onnxruntime"]
print(df[columns])

The output would look like:

[bench_virtual] 1/5 18:01:02 onnx==1.14.1 onnx-extended==0.2.1 onnxruntime==1.16.0
[bench_virtual] 2/5 18:01:06 onnx==1.14.1 onnx-extended==0.2.1 onnxruntime==1.15.1
[bench_virtual] 3/5 18:01:09 onnx==1.14.1 onnx-extended==0.2.1 onnxruntime==1.14.1
[bench_virtual] 4/5 18:01:12 onnx==1.14.1 onnx-extended==0.2.1 onnxruntime==1.13.1
[bench_virtual] 5/5 18:01:15 onnx==1.14.1 onnx-extended==0.2.1 onnxruntime==1.12.1
            runtime  b_avg_time              runtime v_onnxruntime
0   ReferenceEvaluator    0.001879   ReferenceEvaluator        1.16.0
1  CReferenceEvaluator    0.000042  CReferenceEvaluator        1.16.0
2          onnxruntime    0.000013          onnxruntime        1.16.0
3          onnxruntime    0.000012          onnxruntime        1.15.1
4          onnxruntime    0.000017          onnxruntime        1.14.1
5          onnxruntime    0.000012          onnxruntime        1.13.1
6          onnxruntime    0.000011          onnxruntime        1.12.1

The differences are not significant on such small model except for the python runtime.