Measuring onnxruntime performance against a cython binding¶

The following code measures the performance of the python bindings against a cython binding. The time spent in it is not significant when the computation is huge but it may be for small matrices.

import numpy
from pandas import DataFrame
import matplotlib.pyplot as plt
from tqdm import tqdm
from onnx import numpy_helper, TensorProto
from onnx.helper import (
    make_model,
    make_node,
    make_graph,
    make_tensor_value_info,
    make_opsetid,
)
from onnx.checker import check_model
from onnxruntime import InferenceSession
from onnx_extended.ortcy.wrap.ortinf import OrtSession
from onnx_extended.args import get_parsed_args
from onnx_extended.ext_test_case import measure_time, unit_test_going


script_args = get_parsed_args(
    "plot_bench_cypy_ort",
    description=__doc__,
    dims=(
        "1,10" if unit_test_going() else "1,10,100,1000",
        "square matrix dimensions to try, comma separated values",
    ),
    expose="repeat,number",
)

A simple onnx model¶

A = numpy_helper.from_array(numpy.array([1], dtype=numpy.float32), name="A")
X = make_tensor_value_info("X", TensorProto.FLOAT, [None, None])
Y = make_tensor_value_info("Y", TensorProto.FLOAT, [None, None])
node1 = make_node("Add", ["X", "A"], ["Y"])
graph = make_graph([node1], "+1", [X], [Y], [A])
onnx_model = make_model(graph, opset_imports=[make_opsetid("", 18)], ir_version=8)
check_model(onnx_model)

Two python bindings on CPU¶

sess_ort = InferenceSession(
    onnx_model.SerializeToString(), providers=["CPUExecutionProvider"]
)
sess_ext = OrtSession(onnx_model.SerializeToString())

x = numpy.random.randn(10, 10).astype(numpy.float32)
y = x + 1

y_ort = sess_ort.run(None, {"X": x})[0]
y_ext = sess_ext.run([x])[0]

d_ort = numpy.abs(y_ort - y).sum()
d_ext = numpy.abs(y_ext - y).sum()
print(f"Discrepancies: d_ort={d_ort}, d_ext={d_ext}")

Discrepancies: d_ort=0.0, d_ext=0.0

Time measurement¶

run_1_1 is a specific implementation when there is only 1 input and output.

t_ort = measure_time(lambda: sess_ort.run(None, {"X": x})[0], number=200, repeat=100)
print(f"t_ort={t_ort}")

t_ext = measure_time(lambda: sess_ext.run([x])[0], number=200, repeat=100)
print(f"t_ext={t_ext}")

t_ext2 = measure_time(lambda: sess_ext.run_1_1(x), number=200, repeat=100)
print(f"t_ext2={t_ext2}")

t_ort={'average': np.float64(5.4070025496912415e-06), 'deviation': np.float64(2.1811158847588845e-06), 'min_exec': np.float64(4.663300005631754e-06), 'max_exec': np.float64(2.5058569999600877e-05), 'repeat': 100, 'number': 200, 'ttime': np.float64(0.0005407002549691241), 'context_size': 64, 'warmup_time': 0.0001342309988103807}
t_ext={'average': np.float64(5.1815271996019875e-06), 'deviation': np.float64(4.304988726654559e-07), 'min_exec': np.float64(4.901085003439221e-06), 'max_exec': np.float64(7.735379995210678e-06), 'repeat': 100, 'number': 200, 'ttime': np.float64(0.0005181527199601988), 'context_size': 64, 'warmup_time': 8.35049995657755e-05}
t_ext2={'average': np.float64(4.72687039982702e-06), 'deviation': np.float64(5.308661812130688e-07), 'min_exec': np.float64(4.307794997657766e-06), 'max_exec': np.float64(7.513645005019498e-06), 'repeat': 100, 'number': 200, 'ttime': np.float64(0.00047268703998270205), 'context_size': 64, 'warmup_time': 2.5773000743356533e-05}

Benchmark¶

dims = [int(i) for i in script_args.dims.split(",")]

data = []
for dim in tqdm(dims):
    if dim < 1000:
        number, repeat = script_args.number, script_args.repeat
    else:
        number, repeat = script_args.number * 5, script_args.repeat * 5
    x = numpy.random.randn(dim, dim).astype(numpy.float32)
    t_ort = measure_time(
        lambda x=x: sess_ort.run(None, {"X": x})[0], number=number, repeat=50
    )
    t_ort["name"] = "ort"
    t_ort["dim"] = dim
    data.append(t_ort)

    t_ext = measure_time(lambda x=x: sess_ext.run([x])[0], number=number, repeat=repeat)
    t_ext["name"] = "ext"
    t_ext["dim"] = dim
    data.append(t_ext)

    t_ext2 = measure_time(lambda x=x: sess_ext.run_1_1(x), number=number, repeat=repeat)
    t_ext2["name"] = "ext_1_1"
    t_ext2["dim"] = dim
    data.append(t_ext2)

    if unit_test_going() and dim >= 10:
        break


df = DataFrame(data)
df

  0%|          | 0/4 [00:00<?, ?it/s]
100%|██████████| 4/4 [00:01<00:00,  2.38it/s]
100%|██████████| 4/4 [00:01<00:00,  2.38it/s]

	average	deviation	min_exec	max_exec	repeat	number	ttime	context_size	warmup_time	name	dim
0	0.000005	3.113291e-07	0.000005	0.000007	50	10	0.000254	64	0.000072	ort	1
1	0.000006	1.095015e-06	0.000005	0.000009	10	10	0.000056	64	0.000068	ext	1
2	0.000007	4.366461e-06	0.000004	0.000018	10	10	0.000071	64	0.000016	ext_1_1	1
3	0.000006	3.298409e-06	0.000005	0.000022	50	10	0.000288	64	0.000042	ort	10
4	0.000005	3.370656e-07	0.000005	0.000006	10	10	0.000053	64	0.000059	ext	10
5	0.000006	5.555920e-06	0.000004	0.000023	10	10	0.000064	64	0.000015	ext_1_1	10
6	0.000006	1.399302e-06	0.000006	0.000013	50	10	0.000313	64	0.000038	ort	100
7	0.000007	2.568428e-07	0.000007	0.000008	10	10	0.000071	64	0.000042	ext	100
8	0.000006	7.985796e-08	0.000006	0.000007	10	10	0.000064	64	0.000016	ext_1_1	100
9	0.000062	2.124362e-05	0.000035	0.000132	50	50	0.003124	64	0.005340	ort	1000
10	0.000294	5.240357e-05	0.000247	0.000469	50	50	0.014722	64	0.003543	ext	1000
11	0.000297	7.914858e-05	0.000241	0.000518	50	50	0.014847	64	0.000593	ext_1_1	1000

Plots¶

piv = df.pivot(index="dim", columns="name", values="average")

fig, ax = plt.subplots(1, 1)
piv.plot(ax=ax, title="Binding Comparison", logy=True, logx=True)
fig.tight_layout()
fig.savefig("plot_bench_ort.png")

Total running time of the script: (0 minutes 2.464 seconds)

Gallery generated by Sphinx-Gallery