Profiling onnxruntime execution#

This example shows how to profile the execution of an ONNX model with onnxruntime and visualize the results with plot_ort_profile() and plot_ort_profile_timeline().

Build a small ONNX model#

import os
import tempfile

import numpy as np
import onnx
import onnx.helper as oh
import onnx.numpy_helper as onh

model = oh.make_model(
    oh.make_graph(
        [
            oh.make_node("MatMul", ["x", "W1"], ["h"]),
            oh.make_node("Add", ["h", "b1"], ["relu_in"]),
            oh.make_node("Relu", ["relu_in"], ["output"]),
        ],
        "test_graph",
        [oh.make_tensor_value_info("x", onnx.TensorProto.FLOAT, (2, 4))],
        [oh.make_tensor_value_info("output", onnx.TensorProto.FLOAT, (2, 8))],
        [
            onh.from_array(np.random.randn(4, 8).astype(np.float32), name="W1"),
            onh.from_array(np.zeros(8, dtype=np.float32), name="b1"),
        ],
    ),
    opset_imports=[oh.make_opsetid("", 18)],
    ir_version=8,
)

Run with onnxruntime profiling enabled#

from onnxruntime import InferenceSession, SessionOptions

tmpdir = tempfile.mkdtemp()
opts = SessionOptions()
opts.enable_profiling = True
opts.profile_file_prefix = os.path.join(tmpdir, "ort_profile")

sess = InferenceSession(
    model.SerializeToString(), sess_options=opts, providers=["CPUExecutionProvider"]
)
x = np.random.randn(2, 4).astype(np.float32)
for _ in range(5):
    sess.run(None, {"x": x})

profile_file = sess.end_profiling()
print("Profile written to:", profile_file)

Profile written to: /tmp/tmp4nk0w9wx/ort_profile_2026-05-06_10-56-11_701.json

Parse the profiling file into a DataFrame#

from yaourt.tools.js_profile import js_profile_to_dataframe

df = js_profile_to_dataframe(profile_file, first_it_out=True)
print(df[["name", "event_name", "iteration", "dur"]].head(10).to_string())

                                 name                   event_name  iteration   dur
               model_loading_array          model_loading_array         -1   335
            session_initialization       session_initialization         -1  1012
fused /MatMulAddFusion_kernel_time                  kernel_time         -1   166
       SequentialExecutor::Execute  SequentialExecutor::Execute          0   186
                         model_run                    model_run          0   222
fused /MatMulAddFusion_kernel_time                  kernel_time          0    31
       SequentialExecutor::Execute  SequentialExecutor::Execute          1    37
                         model_run                    model_run          1    48
fused /MatMulAddFusion_kernel_time                  kernel_time          1    20
       SequentialExecutor::Execute  SequentialExecutor::Execute          2    26

Plot a summary by operator type#

import matplotlib
import matplotlib.pyplot as plt

matplotlib.use("Agg")

from yaourt.tools.js_profile import plot_ort_profile

fig, ax = plt.subplots(figsize=(8, 4))
plot_ort_profile(df, ax0=ax, title="Time per operator (µs)")
fig.tight_layout()
fig

<Figure size 800x400 with 1 Axes>

Plot the execution timeline#

from yaourt.tools.js_profile import plot_ort_profile_timeline

fig2, ax2 = plt.subplots(figsize=(6, 6))
plot_ort_profile_timeline(df, ax=ax2, title="Execution timeline")
fig2.tight_layout()
fig2

<Figure size 600x600 with 1 Axes>

Cleanup#

os.unlink(profile_file)
import shutil

shutil.rmtree(tmpdir, ignore_errors=True)

Total running time of the script: (0 minutes 0.531 seconds)

Gallery generated by Sphinx-Gallery