Measuring onnxruntime performance against a cython binding¶

The following code measures the performance of the python bindings against a cython binding. The time spent in it is not significant when the computation is huge but it may be for small matrices.

import numpy
from pandas import DataFrame
import matplotlib.pyplot as plt
from tqdm import tqdm
from onnx import numpy_helper, TensorProto
from onnx.helper import (
    make_model,
    make_node,
    make_graph,
    make_tensor_value_info,
    make_opsetid,
)
from onnx.checker import check_model
from onnxruntime import InferenceSession
from onnx_extended.ortcy.wrap.ortinf import OrtSession
from onnx_extended.args import get_parsed_args
from onnx_extended.ext_test_case import measure_time, unit_test_going


script_args = get_parsed_args(
    "plot_bench_cypy_ort",
    description=__doc__,
    dims=(
        "1,10" if unit_test_going() else "1,10,100,1000",
        "square matrix dimensions to try, comma separated values",
    ),
    expose="repeat,number",
)

A simple onnx model¶

A = numpy_helper.from_array(numpy.array([1], dtype=numpy.float32), name="A")
X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
Y = make_tensor_value_info("Y", TensorProto.FLOAT, [None, None])
node1 = make_node("Add", ["X", "A"], ["Y"])
graph = make_graph([node1], "+1", [X], [Y], [A])
onnx_model = make_model(graph, opset_imports=[make_opsetid("", 18)], ir_version=8)
check_model(onnx_model)

Two python bindings on CPU¶

sess_ort = InferenceSession(
    onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
)
sess_ext = OrtSession(onnx_model.SerializeToString())

x = numpy.random.randn(10, 10).astype(numpy.float32)
y = x + 1

y_ort = sess_ort.run(None, {"X": x})[0]
y_ext = sess_ext.run([x])[0]

d_ort = numpy.abs(y_ort - y).sum()
d_ext = numpy.abs(y_ext - y).sum()
print(f"Discrepancies: d_ort={d_ort}, d_ext={d_ext}")

Discrepancies: d_ort=0.0, d_ext=0.0

Time measurement¶

run_1_1 is a specific implementation when there is only 1 input and output.

t_ort = measure_time(lambda: sess_ort.run(None, {"X": x})[0], number=200, repeat=100)
print(f"t_ort={t_ort}")

t_ext = measure_time(lambda: sess_ext.run([x])[0], number=200, repeat=100)
print(f"t_ext={t_ext}")

t_ext2 = measure_time(lambda: sess_ext.run_1_1(x), number=200, repeat=100)
print(f"t_ext2={t_ext2}")

t_ort={'average': np.float64(1.4214167399404685e-05), 'deviation': np.float64(9.921318194458582e-06), 'min_exec': np.float64(8.368075032194611e-06), 'max_exec': np.float64(5.840254500071751e-05), 'repeat': 100, 'number': 200, 'ttime': np.float64(0.0014214167399404685), 'context_size': 64, 'warmup_time': 0.00023353099822998047}
t_ext={'average': np.float64(1.053838609768718e-05), 'deviation': np.float64(4.029739849211689e-06), 'min_exec': np.float64(7.578765034850221e-06), 'max_exec': np.float64(2.532805498049129e-05), 'repeat': 100, 'number': 200, 'ttime': np.float64(0.001053838609768718), 'context_size': 64, 'warmup_time': 0.00024365499848499894}
t_ext2={'average': np.float64(5.640346098516602e-06), 'deviation': np.float64(1.730949932722855e-06), 'min_exec': np.float64(4.160329990554601e-06), 'max_exec': np.float64(1.6278900002362206e-05), 'repeat': 100, 'number': 200, 'ttime': np.float64(0.0005640346098516602), 'context_size': 64, 'warmup_time': 5.1965005695819855e-05}

Benchmark¶

dims = [int(i) for i in script_args.dims.split(",")]

data = []
for dim in tqdm(dims):
    if dim < 1000:
        number, repeat = script_args.number, script_args.repeat
    else:
        number, repeat = script_args.number * 5, script_args.repeat * 5
    x = numpy.random.randn(dim, dim).astype(numpy.float32)
    t_ort = measure_time(
        lambda x=x: sess_ort.run(None, {"X": x})[0], number=number, repeat=50
    )
    t_ort["name"] = "ort"
    t_ort["dim"] = dim
    data.append(t_ort)

    t_ext = measure_time(lambda x=x: sess_ext.run([x])[0], number=number, repeat=repeat)
    t_ext["name"] = "ext"
    t_ext["dim"] = dim
    data.append(t_ext)

    t_ext2 = measure_time(lambda x=x: sess_ext.run_1_1(x), number=number, repeat=repeat)
    t_ext2["name"] = "ext_1_1"
    t_ext2["dim"] = dim
    data.append(t_ext2)

    if unit_test_going() and dim >= 10:
        break


df = DataFrame(data)
df

  0%|          | 0/4 [00:00<?, ?it/s]
100%|██████████| 4/4 [00:02<00:00,  1.72it/s]
100%|██████████| 4/4 [00:02<00:00,  1.72it/s]

	average	deviation	min_exec	max_exec	repeat	number	ttime	context_size	warmup_time	name	dim
0	0.000008	2.067263e-06	0.000006	0.000017	50	10	0.000421	64	0.000155	ort	1
1	0.000007	1.885701e-06	0.000005	0.000011	10	10	0.000067	64	0.000263	ext	1
2	0.000006	2.638214e-06	0.000005	0.000014	10	10	0.000058	64	0.000158	ext_1_1	1
3	0.000006	1.905289e-06	0.000005	0.000019	50	10	0.000300	64	0.000104	ort	10
4	0.000006	8.355474e-07	0.000005	0.000008	10	10	0.000056	64	0.000106	ext	10
5	0.000005	2.660827e-06	0.000004	0.000013	10	10	0.000054	64	0.000019	ext_1_1	10
6	0.000008	1.033075e-06	0.000007	0.000012	50	10	0.000385	64	0.000120	ort	100
7	0.000008	1.613468e-07	0.000008	0.000008	10	10	0.000081	64	0.000107	ext	100
8	0.000007	1.018958e-06	0.000007	0.000010	10	10	0.000075	64	0.000017	ext_1_1	100
9	0.000062	1.096915e-05	0.000046	0.000094	50	50	0.003118	64	0.000901	ort	1000
10	0.000491	2.403298e-04	0.000330	0.001513	50	50	0.024552	64	0.007834	ext	1000
11	0.000356	2.816248e-05	0.000312	0.000436	50	50	0.017788	64	0.000352	ext_1_1	1000

Plots¶

piv = df.pivot(index="dim", columns="name", values="average")

fig, ax = plt.subplots(1, 1)
piv.plot(ax=ax, title="Binding Comparison", logy=True, logx=True)
fig.tight_layout()
fig.savefig("plot_bench_ort.png")

Total running time of the script: (0 minutes 3.380 seconds)

Gallery generated by Sphinx-Gallery