Note

Go to the end to download the full example code.

Export with dynamic dimensions in `{0,1}` into ONNX¶

This duplicates the example Export with dynamic dimensions in {0,1} but for torch.onnx.export(). It checks what inputs can be used to export and with which inputs it can work.

Available input sets¶

import itertools
from tqdm import tqdm
import numpy as np
import pandas
import torch
import onnxruntime
from onnx_diagnostic import doc
from onnx_diagnostic.helpers import max_diff, string_type, flatten_object
from onnx_diagnostic.helpers.torch_helper import torch_deepcopy
from onnx_diagnostic.helpers.rt_helper import make_feeds
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
from onnx_diagnostic.torch_export_patches import (
    torch_export_patches,
    register_additional_serialization_functions,
)

data = get_untrained_model_with_inputs("arnir0/Tiny-LLM", add_second_input=True)
model, dynamic_shapes = data["model"], data["dynamic_shapes"]

The trained model can be obtained with:

MODEL_NAME = "arnir0/Tiny-LLM"
tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME)

input_sets = {k: v for k, v in data.items() if k.startswith("inputs")}

for k, v in input_sets.items():
    print(f"{k:20}: {string_type(v, with_shape=True)}")

inputs              : dict(input_ids:T7s2x3,attention_mask:T7s2x33,position_ids:T7s2x3,past_key_values:DynamicCache(key_cache=#1[T1s2x1x30x96], value_cache=#1[T1s2x1x30x96]))
inputs_prompt       : dict(input_ids:T7s1x11)
inputs2             : dict(input_ids:T7s3x4,attention_mask:T7s3x35,position_ids:T7s3x4,past_key_values:DynamicCache(key_cache=#1[T1s3x1x31x96], value_cache=#1[T1s3x1x31x96]))
inputs_empty_cache  : dict(input_ids:T7s2x3,attention_mask:T7s2x3,position_ids:T7s2x3,past_key_values:DynamicCache(key_cache=#1[T1s2x1x0x96], value_cache=#1[T1s2x1x0x96]))
inputs_batch1       : dict(input_ids:T7s1x3,attention_mask:T7s1x33,position_ids:T7s1x3,past_key_values:DynamicCache(key_cache=#1[T1s1x1x30x96], value_cache=#1[T1s1x1x30x96]))

The dynamic shapes are:

print(f"dynamic_shapes: {string_type(dynamic_shapes)}")

dynamic_shapes: dict(input_ids:{0:DYN(batch),1:DYN(seq_length)},attention_mask:{0:DYN(batch),1:DYN(cache+seq)},position_ids:{0:DYN(batch),1:DYN(seq_length)},past_key_values:#2[{0:DYN(batch),2:DYN(cache_length)},{0:DYN(batch),2:DYN(cache_length)}])

Let’s check they all work and compute the expected values. We use deepcopy because caches are usually modified inplace.

expected = {}
for k, v in input_sets.items():
    expected[k] = model(**torch_deepcopy(v))
    print(f"{k:20}: {string_type(expected[k], with_shape=True)}")

inputs              : CausalLMOutputWithPast(logits:T1s2x3x32000,past_key_values:DynamicCache(key_cache=#1[T1s2x1x33x96], value_cache=#1[T1s2x1x33x96]))
inputs_prompt       : CausalLMOutputWithPast(logits:T1s1x11x32000,past_key_values:DynamicCache(key_cache=#1[T1s1x1x11x96], value_cache=#1[T1s1x1x11x96]))
inputs2             : CausalLMOutputWithPast(logits:T1s3x4x32000,past_key_values:DynamicCache(key_cache=#1[T1s3x1x35x96], value_cache=#1[T1s3x1x35x96]))
inputs_empty_cache  : CausalLMOutputWithPast(logits:T1s2x3x32000,past_key_values:DynamicCache(key_cache=#1[T1s2x1x3x96], value_cache=#1[T1s2x1x3x96]))
inputs_batch1       : CausalLMOutputWithPast(logits:T1s1x3x32000,past_key_values:DynamicCache(key_cache=#1[T1s1x1x33x96], value_cache=#1[T1s1x1x33x96]))

Export with options¶

We try to export with the following options:

cache registration: register cache serialization with onnx_diagnostic.torch_export_patches.register_additional_serialization_functions()
oblivious: an option to remove some the exception raises by the exporter
rt: see prefer_deferred_runtime_asserts_over_guards in torch.export.export()
cache_patch: patches the model before exporting with onnx_diagnostic.torch_export_patches.torch_export_patches()

Some function first.

def export_model(
    model, dynamic_shapes, inputs, cache=False, oblivious=False, rt=False, cache_patch=False
):
    if cache and not cache_patch:
        with register_additional_serialization_functions(patch_transformers=True):
            return export_model(model, dynamic_shapes, inputs, oblivious=oblivious, rt=rt)
    if cache_patch:
        with torch_export_patches(
            patch_torch=cache_patch in ("all", "torch", True, 1),
            patch_transformers=cache_patch in ("all", "transformers", True, 1),
        ):
            return export_model(model, dynamic_shapes, inputs, oblivious=oblivious, rt=rt)
    if oblivious:
        with torch.fx.experimental._config.patch(backed_size_oblivious=True):
            return export_model(model, dynamic_shapes, inputs, rt=rt)
    ep = torch.export.export(
        model,
        (),
        inputs,
        dynamic_shapes=use_dyn_not_str(dynamic_shapes),
        prefer_deferred_runtime_asserts_over_guards=rt,
    )
    return torch.onnx.export(ep, args=(), kwargs=inputs, dynamic_shapes=dynamic_shapes)


def try_export_model(
    model, dynamic_shapes, inputs, cache=False, oblivious=False, rt=False, cache_patch=False
):
    try:
        return export_model(
            model,
            dynamic_shapes,
            inputs,
            cache=cache,
            oblivious=oblivious,
            rt=rt,
            cache_patch=cache_patch,
        )
    except Exception as e:
        return e


def validation(ep, input_sets, expected, catch_exception=True):
    sess = onnxruntime.InferenceSession(
        ep.model_proto.SerializeToString(), providers=["CPUExecutionProvider"]
    )
    for k, v in input_sets.items():
        try:
            feeds = make_feeds(sess, torch_deepcopy(v), use_numpy=True)
        except Exception as e:
            if not catch_exception:
                raise
            yield k, e
            continue
        try:
            got = sess.run(None, feeds)
        except Exception as e:
            if not catch_exception:
                raise
            yield k, e
            continue
        yield k, max_diff(flatten_object(expected[k], drop_keys=True), got)

Verification an example known to be working is.

ep = export_model(
    model,
    dynamic_shapes,
    torch_deepcopy(input_sets["inputs"]),
    cache_patch=True,
)
res = list(validation(ep, dict(inputs=input_sets["inputs"]), expected, catch_exception=False))
assert res[0][1]["abs"] < 1e-5, f"Unexpected issue with res={res}"

[torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

The main loop¶

results = []

possibilities = [*[[0, 1] for _ in range(4)], list(input_sets)]
possibilities[1] = [0, "all", "torch", "transformers"]
with tqdm(list(itertools.product(*possibilities))) as pbar:
    for cache, cache_patch, oblivious, rt, inputs in pbar:
        if cache_patch and not cache:
            # patches include caches.
            continue
        kwargs = dict(cache=cache, cache_patch=cache_patch, oblivious=oblivious, rt=rt)
        legend = "-".join(
            (k if isinstance(v, int) else f"{k}:{v}") for k, v in kwargs.items() if v
        )
        legend = f"{legend}/{inputs}"
        pbar.set_description(f"{legend} EXPORT")

        # export
        ep = try_export_model(
            model, dynamic_shapes, torch_deepcopy(input_sets[inputs]), **kwargs
        )
        if isinstance(ep, Exception):
            obs = {
                **kwargs,
                "export_with": inputs,
                "EXPORT": 0,
                "ERR-EXPORT": str(ep).split("\n")[0],
            }
            results.append(obs)
            continue

        pbar.set_description(f"{legend} VALIDATE")
        common = {**kwargs, "export_with": inputs, "EXPORT": 1}
        for inp, res in validation(ep, input_sets, expected):
            if isinstance(res, Exception):
                obs = {
                    **common,
                    "run_with": inp,
                    "ERR-RUN": str(res).split("\n")[0],
                    "WORKS": 0,
                }
            else:
                obs = {
                    **common,
                    "run_with": inp,
                    "WORKS": int(~np.isnan(res["abs"]) and res["abs"] < 1e-3),
                }
            results.append(obs)

  0%|          | 0/160 [00:00<?, ?it/s]
/inputs EXPORT:   0%|          | 0/160 [00:00<?, ?it/s][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

/inputs EXPORT:   1%|          | 1/160 [00:03<08:07,  3.07s/it]
/inputs_prompt EXPORT:   1%|          | 1/160 [00:03<08:07,  3.07s/it]
/inputs2 EXPORT:   1%|          | 1/160 [00:03<08:07,  3.07s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

/inputs2 EXPORT:   2%|▏         | 3/160 [00:06<05:10,  1.98s/it]
/inputs_empty_cache EXPORT:   2%|▏         | 3/160 [00:06<05:10,  1.98s/it]
/inputs_empty_cache EXPORT:   2%|▎         | 4/160 [00:07<04:07,  1.59s/it]
/inputs_batch1 EXPORT:   2%|▎         | 4/160 [00:07<04:07,  1.59s/it]
/inputs_batch1 EXPORT:   3%|▎         | 5/160 [00:07<03:23,  1.31s/it]
rt/inputs EXPORT:   3%|▎         | 5/160 [00:07<03:23,  1.31s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

rt/inputs EXPORT:   4%|▍         | 6/160 [00:11<04:58,  1.94s/it]
rt/inputs_prompt EXPORT:   4%|▍         | 6/160 [00:11<04:58,  1.94s/it]
rt/inputs2 EXPORT:   4%|▍         | 6/160 [00:11<04:58,  1.94s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

rt/inputs2 EXPORT:   5%|▌         | 8/160 [00:14<04:32,  1.79s/it]
rt/inputs_empty_cache EXPORT:   5%|▌         | 8/160 [00:14<04:32,  1.79s/it]
rt/inputs_empty_cache EXPORT:   6%|▌         | 9/160 [00:15<03:54,  1.56s/it]
rt/inputs_batch1 EXPORT:   6%|▌         | 9/160 [00:15<03:54,  1.56s/it]
rt/inputs_batch1 EXPORT:   6%|▋         | 10/160 [00:16<03:55,  1.57s/it]
oblivious/inputs EXPORT:   6%|▋         | 10/160 [00:16<03:55,  1.57s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious/inputs EXPORT:   7%|▋         | 11/160 [00:20<05:00,  2.02s/it]
oblivious/inputs_prompt EXPORT:   7%|▋         | 11/160 [00:20<05:00,  2.02s/it]
oblivious/inputs2 EXPORT:   7%|▋         | 11/160 [00:20<05:00,  2.02s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious/inputs2 EXPORT:   8%|▊         | 13/160 [00:22<04:20,  1.78s/it]
oblivious/inputs_empty_cache EXPORT:   8%|▊         | 13/160 [00:22<04:20,  1.78s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious/inputs_empty_cache EXPORT:   9%|▉         | 14/160 [00:25<04:57,  2.04s/it]
oblivious/inputs_batch1 EXPORT:   9%|▉         | 14/160 [00:25<04:57,  2.04s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious/inputs_batch1 EXPORT:   9%|▉         | 15/160 [00:29<05:41,  2.35s/it]
oblivious-rt/inputs EXPORT:   9%|▉         | 15/160 [00:29<05:41,  2.35s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious-rt/inputs EXPORT:  10%|█         | 16/160 [00:32<06:01,  2.51s/it]
oblivious-rt/inputs_prompt EXPORT:  10%|█         | 16/160 [00:32<06:01,  2.51s/it]
oblivious-rt/inputs2 EXPORT:  10%|█         | 16/160 [00:32<06:01,  2.51s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious-rt/inputs2 EXPORT:  11%|█▏        | 18/160 [00:36<05:42,  2.41s/it]
oblivious-rt/inputs_empty_cache EXPORT:  11%|█▏        | 18/160 [00:36<05:42,  2.41s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious-rt/inputs_empty_cache EXPORT:  12%|█▏        | 19/160 [00:39<06:11,  2.63s/it]
oblivious-rt/inputs_batch1 EXPORT:  12%|█▏        | 19/160 [00:39<06:11,  2.63s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious-rt/inputs_batch1 EXPORT:  12%|█▎        | 20/160 [00:43<06:29,  2.78s/it]
cache/inputs EXPORT:  12%|█▎        | 20/160 [00:43<06:29,  2.78s/it]              [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache/inputs EXPORT:  51%|█████     | 81/160 [00:46<00:13,  5.78it/s]
cache/inputs_prompt EXPORT:  51%|█████     | 81/160 [00:46<00:13,  5.78it/s]
cache/inputs2 EXPORT:  51%|█████     | 81/160 [00:46<00:13,  5.78it/s]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache/inputs2 EXPORT:  52%|█████▏    | 83/160 [00:48<00:16,  4.77it/s]
cache/inputs_empty_cache EXPORT:  52%|█████▏    | 83/160 [00:48<00:16,  4.77it/s]
cache/inputs_empty_cache EXPORT:  52%|█████▎    | 84/160 [00:49<00:17,  4.39it/s]
cache/inputs_batch1 EXPORT:  52%|█████▎    | 84/160 [00:49<00:17,  4.39it/s]
cache/inputs_batch1 EXPORT:  53%|█████▎    | 85/160 [00:50<00:18,  4.05it/s]
cache-rt/inputs EXPORT:  53%|█████▎    | 85/160 [00:50<00:18,  4.05it/s]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-rt/inputs EXPORT:  54%|█████▍    | 86/160 [00:52<00:28,  2.58it/s]
cache-rt/inputs_prompt EXPORT:  54%|█████▍    | 86/160 [00:52<00:28,  2.58it/s]
cache-rt/inputs2 EXPORT:  54%|█████▍    | 86/160 [00:52<00:28,  2.58it/s]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-rt/inputs2 EXPORT:  55%|█████▌    | 88/160 [00:57<00:48,  1.48it/s]
cache-rt/inputs_empty_cache EXPORT:  55%|█████▌    | 88/160 [00:57<00:48,  1.48it/s]
cache-rt/inputs_empty_cache EXPORT:  56%|█████▌    | 89/160 [00:58<00:49,  1.43it/s]
cache-rt/inputs_batch1 EXPORT:  56%|█████▌    | 89/160 [00:58<00:49,  1.43it/s]
cache-rt/inputs_batch1 EXPORT:  56%|█████▋    | 90/160 [00:59<00:49,  1.42it/s]
cache-oblivious/inputs EXPORT:  56%|█████▋    | 90/160 [00:59<00:49,  1.42it/s][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious/inputs EXPORT:  57%|█████▋    | 91/160 [01:02<01:09,  1.00s/it]
cache-oblivious/inputs_prompt EXPORT:  57%|█████▋    | 91/160 [01:02<01:09,  1.00s/it]
cache-oblivious/inputs2 EXPORT:  57%|█████▋    | 91/160 [01:02<01:09,  1.00s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious/inputs2 EXPORT:  58%|█████▊    | 93/160 [01:05<01:15,  1.12s/it]
cache-oblivious/inputs_empty_cache EXPORT:  58%|█████▊    | 93/160 [01:05<01:15,  1.12s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious/inputs_empty_cache EXPORT:  59%|█████▉    | 94/160 [01:08<01:31,  1.39s/it]
cache-oblivious/inputs_batch1 EXPORT:  59%|█████▉    | 94/160 [01:08<01:31,  1.39s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious/inputs_batch1 EXPORT:  59%|█████▉    | 95/160 [01:10<01:47,  1.66s/it]
cache-oblivious-rt/inputs EXPORT:  59%|█████▉    | 95/160 [01:10<01:47,  1.66s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious-rt/inputs EXPORT:  60%|██████    | 96/160 [01:13<02:02,  1.92s/it]
cache-oblivious-rt/inputs_prompt EXPORT:  60%|██████    | 96/160 [01:13<02:02,  1.92s/it]
cache-oblivious-rt/inputs2 EXPORT:  60%|██████    | 96/160 [01:13<02:02,  1.92s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious-rt/inputs2 EXPORT:  61%|██████▏   | 98/160 [01:16<01:48,  1.74s/it]
cache-oblivious-rt/inputs_empty_cache EXPORT:  61%|██████▏   | 98/160 [01:16<01:48,  1.74s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious-rt/inputs_empty_cache EXPORT:  62%|██████▏   | 99/160 [01:20<02:19,  2.28s/it]
cache-oblivious-rt/inputs_batch1 EXPORT:  62%|██████▏   | 99/160 [01:20<02:19,  2.28s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious-rt/inputs_batch1 EXPORT:  62%|██████▎   | 100/160 [01:24<02:30,  2.51s/it]
cache-cache_patch:all/inputs EXPORT:  62%|██████▎   | 100/160 [01:24<02:30,  2.51s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all/inputs VALIDATE:  62%|██████▎   | 100/160 [01:27<02:30,  2.51s/it]
cache-cache_patch:all/inputs VALIDATE:  63%|██████▎   | 101/160 [01:28<02:49,  2.88s/it]
cache-cache_patch:all/inputs_prompt EXPORT:  63%|██████▎   | 101/160 [01:28<02:49,  2.88s/it]
cache-cache_patch:all/inputs2 EXPORT:  63%|██████▎   | 101/160 [01:28<02:49,  2.88s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all/inputs2 VALIDATE:  63%|██████▎   | 101/160 [01:31<02:49,  2.88s/it]
cache-cache_patch:all/inputs2 VALIDATE:  64%|██████▍   | 103/160 [01:32<02:22,  2.50s/it]
cache-cache_patch:all/inputs_empty_cache EXPORT:  64%|██████▍   | 103/160 [01:32<02:22,  2.50s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 43 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all/inputs_empty_cache VALIDATE:  64%|██████▍   | 103/160 [01:36<02:22,  2.50s/it]
cache-cache_patch:all/inputs_empty_cache VALIDATE:  65%|██████▌   | 104/160 [01:36<02:44,  2.93s/it]
cache-cache_patch:all/inputs_batch1 EXPORT:  65%|██████▌   | 104/160 [01:36<02:44,  2.93s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 17 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all/inputs_batch1 VALIDATE:  65%|██████▌   | 104/160 [01:39<02:44,  2.93s/it]
cache-cache_patch:all/inputs_batch1 VALIDATE:  66%|██████▌   | 105/160 [01:39<02:43,  2.97s/it]
cache-cache_patch:all-rt/inputs EXPORT:  66%|██████▌   | 105/160 [01:39<02:43,  2.97s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-rt/inputs VALIDATE:  66%|██████▌   | 105/160 [01:45<02:43,  2.97s/it]
cache-cache_patch:all-rt/inputs VALIDATE:  66%|██████▋   | 106/160 [01:45<03:28,  3.86s/it]
cache-cache_patch:all-rt/inputs_prompt EXPORT:  66%|██████▋   | 106/160 [01:45<03:28,  3.86s/it]
cache-cache_patch:all-rt/inputs2 EXPORT:  66%|██████▋   | 106/160 [01:45<03:28,  3.86s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-rt/inputs2 VALIDATE:  66%|██████▋   | 106/160 [01:49<03:28,  3.86s/it]
cache-cache_patch:all-rt/inputs2 VALIDATE:  68%|██████▊   | 108/160 [01:50<02:41,  3.11s/it]
cache-cache_patch:all-rt/inputs_empty_cache EXPORT:  68%|██████▊   | 108/160 [01:50<02:41,  3.11s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 43 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-rt/inputs_empty_cache VALIDATE:  68%|██████▊   | 108/160 [01:52<02:41,  3.11s/it]
cache-cache_patch:all-rt/inputs_empty_cache VALIDATE:  68%|██████▊   | 109/160 [01:53<02:38,  3.10s/it]
cache-cache_patch:all-rt/inputs_batch1 EXPORT:  68%|██████▊   | 109/160 [01:53<02:38,  3.10s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 17 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-rt/inputs_batch1 VALIDATE:  68%|██████▊   | 109/160 [01:55<02:38,  3.10s/it]
cache-cache_patch:all-rt/inputs_batch1 VALIDATE:  69%|██████▉   | 110/160 [01:56<02:34,  3.09s/it]
cache-cache_patch:all-oblivious/inputs EXPORT:  69%|██████▉   | 110/160 [01:56<02:34,  3.09s/it]  [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious/inputs VALIDATE:  69%|██████▉   | 110/160 [02:00<02:34,  3.09s/it]
cache-cache_patch:all-oblivious/inputs VALIDATE:  69%|██████▉   | 111/160 [02:00<02:45,  3.38s/it]
cache-cache_patch:all-oblivious/inputs_prompt EXPORT:  69%|██████▉   | 111/160 [02:00<02:45,  3.38s/it]
cache-cache_patch:all-oblivious/inputs2 EXPORT:  69%|██████▉   | 111/160 [02:00<02:45,  3.38s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious/inputs2 VALIDATE:  69%|██████▉   | 111/160 [02:04<02:45,  3.38s/it]
cache-cache_patch:all-oblivious/inputs2 VALIDATE:  71%|███████   | 113/160 [02:04<02:11,  2.79s/it]
cache-cache_patch:all-oblivious/inputs_empty_cache EXPORT:  71%|███████   | 113/160 [02:04<02:11,  2.79s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious/inputs_empty_cache VALIDATE:  71%|███████   | 113/160 [02:10<02:11,  2.79s/it]
cache-cache_patch:all-oblivious/inputs_empty_cache VALIDATE:  71%|███████▏  | 114/160 [02:11<02:51,  3.72s/it]
cache-cache_patch:all-oblivious/inputs_batch1 EXPORT:  71%|███████▏  | 114/160 [02:11<02:51,  3.72s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious/inputs_batch1 VALIDATE:  71%|███████▏  | 114/160 [02:15<02:51,  3.72s/it]
cache-cache_patch:all-oblivious/inputs_batch1 VALIDATE:  72%|███████▏  | 115/160 [02:15<02:55,  3.90s/it]
cache-cache_patch:all-oblivious-rt/inputs EXPORT:  72%|███████▏  | 115/160 [02:15<02:55,  3.90s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious-rt/inputs VALIDATE:  72%|███████▏  | 115/160 [02:19<02:55,  3.90s/it]
cache-cache_patch:all-oblivious-rt/inputs VALIDATE:  72%|███████▎  | 116/160 [02:19<02:55,  3.98s/it]
cache-cache_patch:all-oblivious-rt/inputs_prompt EXPORT:  72%|███████▎  | 116/160 [02:19<02:55,  3.98s/it]
cache-cache_patch:all-oblivious-rt/inputs2 EXPORT:  72%|███████▎  | 116/160 [02:19<02:55,  3.98s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious-rt/inputs2 VALIDATE:  72%|███████▎  | 116/160 [02:22<02:55,  3.98s/it]
cache-cache_patch:all-oblivious-rt/inputs2 VALIDATE:  74%|███████▍  | 118/160 [02:22<02:02,  2.93s/it]
cache-cache_patch:all-oblivious-rt/inputs_empty_cache EXPORT:  74%|███████▍  | 118/160 [02:22<02:02,  2.93s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious-rt/inputs_empty_cache VALIDATE:  74%|███████▍  | 118/160 [02:26<02:02,  2.93s/it]
cache-cache_patch:all-oblivious-rt/inputs_empty_cache VALIDATE:  74%|███████▍  | 119/160 [02:26<02:10,  3.18s/it]
cache-cache_patch:all-oblivious-rt/inputs_batch1 EXPORT:  74%|███████▍  | 119/160 [02:26<02:10,  3.18s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 31 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious-rt/inputs_batch1 VALIDATE:  74%|███████▍  | 119/160 [02:31<02:10,  3.18s/it]
cache-cache_patch:all-oblivious-rt/inputs_batch1 VALIDATE:  75%|███████▌  | 120/160 [02:31<02:20,  3.52s/it]
cache-cache_patch:torch/inputs EXPORT:  75%|███████▌  | 120/160 [02:31<02:20,  3.52s/it]                    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch/inputs EXPORT:  76%|███████▌  | 121/160 [02:37<02:43,  4.19s/it]
cache-cache_patch:torch/inputs_prompt EXPORT:  76%|███████▌  | 121/160 [02:37<02:43,  4.19s/it]
cache-cache_patch:torch/inputs2 EXPORT:  76%|███████▌  | 121/160 [02:37<02:43,  4.19s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch/inputs2 EXPORT:  77%|███████▋  | 123/160 [02:40<01:50,  3.00s/it]
cache-cache_patch:torch/inputs_empty_cache EXPORT:  77%|███████▋  | 123/160 [02:40<01:50,  3.00s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch/inputs_empty_cache EXPORT:  78%|███████▊  | 124/160 [02:43<01:46,  2.95s/it]
cache-cache_patch:torch/inputs_batch1 EXPORT:  78%|███████▊  | 124/160 [02:43<01:46,  2.95s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch/inputs_batch1 EXPORT:  78%|███████▊  | 125/160 [02:45<01:36,  2.77s/it]
cache-cache_patch:torch-rt/inputs EXPORT:  78%|███████▊  | 125/160 [02:45<01:36,  2.77s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-rt/inputs EXPORT:  79%|███████▉  | 126/160 [02:48<01:35,  2.81s/it]
cache-cache_patch:torch-rt/inputs_prompt EXPORT:  79%|███████▉  | 126/160 [02:48<01:35,  2.81s/it]
cache-cache_patch:torch-rt/inputs2 EXPORT:  79%|███████▉  | 126/160 [02:48<01:35,  2.81s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-rt/inputs2 EXPORT:  80%|████████  | 128/160 [02:51<01:13,  2.29s/it]
cache-cache_patch:torch-rt/inputs_empty_cache EXPORT:  80%|████████  | 128/160 [02:51<01:13,  2.29s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-rt/inputs_empty_cache EXPORT:  81%|████████  | 129/160 [02:53<01:10,  2.27s/it]
cache-cache_patch:torch-rt/inputs_batch1 EXPORT:  81%|████████  | 129/160 [02:53<01:10,  2.27s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-rt/inputs_batch1 EXPORT:  81%|████████▏ | 130/160 [02:55<01:07,  2.25s/it]
cache-cache_patch:torch-oblivious/inputs EXPORT:  81%|████████▏ | 130/160 [02:55<01:07,  2.25s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious/inputs EXPORT:  82%|████████▏ | 131/160 [03:01<01:31,  3.16s/it]
cache-cache_patch:torch-oblivious/inputs_prompt EXPORT:  82%|████████▏ | 131/160 [03:01<01:31,  3.16s/it]
cache-cache_patch:torch-oblivious/inputs2 EXPORT:  82%|████████▏ | 131/160 [03:01<01:31,  3.16s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious/inputs2 EXPORT:  83%|████████▎ | 133/160 [03:04<01:06,  2.46s/it]
cache-cache_patch:torch-oblivious/inputs_empty_cache EXPORT:  83%|████████▎ | 133/160 [03:04<01:06,  2.46s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious/inputs_empty_cache EXPORT:  84%|████████▍ | 134/160 [03:07<01:06,  2.55s/it]
cache-cache_patch:torch-oblivious/inputs_batch1 EXPORT:  84%|████████▍ | 134/160 [03:07<01:06,  2.55s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious/inputs_batch1 EXPORT:  84%|████████▍ | 135/160 [03:10<01:06,  2.67s/it]
cache-cache_patch:torch-oblivious-rt/inputs EXPORT:  84%|████████▍ | 135/160 [03:10<01:06,  2.67s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious-rt/inputs EXPORT:  85%|████████▌ | 136/160 [03:13<01:06,  2.75s/it]
cache-cache_patch:torch-oblivious-rt/inputs_prompt EXPORT:  85%|████████▌ | 136/160 [03:13<01:06,  2.75s/it]
cache-cache_patch:torch-oblivious-rt/inputs2 EXPORT:  85%|████████▌ | 136/160 [03:13<01:06,  2.75s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious-rt/inputs2 EXPORT:  86%|████████▋ | 138/160 [03:16<00:48,  2.20s/it]
cache-cache_patch:torch-oblivious-rt/inputs_empty_cache EXPORT:  86%|████████▋ | 138/160 [03:16<00:48,  2.20s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious-rt/inputs_empty_cache EXPORT:  87%|████████▋ | 139/160 [03:19<00:49,  2.35s/it]
cache-cache_patch:torch-oblivious-rt/inputs_batch1 EXPORT:  87%|████████▋ | 139/160 [03:19<00:49,  2.35s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious-rt/inputs_batch1 EXPORT:  88%|████████▊ | 140/160 [03:22<00:50,  2.51s/it]
cache-cache_patch:transformers/inputs EXPORT:  88%|████████▊ | 140/160 [03:22<00:50,  2.51s/it]             [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers/inputs VALIDATE:  88%|████████▊ | 140/160 [03:28<00:50,  2.51s/it]
cache-cache_patch:transformers/inputs VALIDATE:  88%|████████▊ | 141/160 [03:28<01:07,  3.57s/it]
cache-cache_patch:transformers/inputs_prompt EXPORT:  88%|████████▊ | 141/160 [03:28<01:07,  3.57s/it]
cache-cache_patch:transformers/inputs2 EXPORT:  88%|████████▊ | 141/160 [03:28<01:07,  3.57s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers/inputs2 VALIDATE:  88%|████████▊ | 141/160 [03:32<01:07,  3.57s/it]
cache-cache_patch:transformers/inputs2 VALIDATE:  89%|████████▉ | 143/160 [03:32<00:48,  2.88s/it]
cache-cache_patch:transformers/inputs_empty_cache EXPORT:  89%|████████▉ | 143/160 [03:32<00:48,  2.88s/it]
cache-cache_patch:transformers/inputs_empty_cache EXPORT:  90%|█████████ | 144/160 [03:33<00:38,  2.40s/it]
cache-cache_patch:transformers/inputs_batch1 EXPORT:  90%|█████████ | 144/160 [03:33<00:38,  2.40s/it]
cache-cache_patch:transformers/inputs_batch1 EXPORT:  91%|█████████ | 145/160 [03:34<00:29,  1.97s/it]
cache-cache_patch:transformers-rt/inputs EXPORT:  91%|█████████ | 145/160 [03:34<00:29,  1.97s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-rt/inputs VALIDATE:  91%|█████████ | 145/160 [03:37<00:29,  1.97s/it]
cache-cache_patch:transformers-rt/inputs VALIDATE:  91%|█████████▏| 146/160 [03:38<00:34,  2.48s/it]
cache-cache_patch:transformers-rt/inputs_prompt EXPORT:  91%|█████████▏| 146/160 [03:38<00:34,  2.48s/it]
cache-cache_patch:transformers-rt/inputs2 EXPORT:  91%|█████████▏| 146/160 [03:38<00:34,  2.48s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-rt/inputs2 VALIDATE:  91%|█████████▏| 146/160 [03:40<00:34,  2.48s/it]
cache-cache_patch:transformers-rt/inputs2 VALIDATE:  92%|█████████▎| 148/160 [03:40<00:23,  1.97s/it]
cache-cache_patch:transformers-rt/inputs_empty_cache EXPORT:  92%|█████████▎| 148/160 [03:40<00:23,  1.97s/it]
cache-cache_patch:transformers-rt/inputs_empty_cache EXPORT:  93%|█████████▎| 149/160 [03:41<00:18,  1.71s/it]
cache-cache_patch:transformers-rt/inputs_batch1 EXPORT:  93%|█████████▎| 149/160 [03:41<00:18,  1.71s/it]
cache-cache_patch:transformers-rt/inputs_batch1 EXPORT:  94%|█████████▍| 150/160 [03:42<00:14,  1.44s/it]
cache-cache_patch:transformers-oblivious/inputs EXPORT:  94%|█████████▍| 150/160 [03:42<00:14,  1.44s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious/inputs VALIDATE:  94%|█████████▍| 150/160 [03:45<00:14,  1.44s/it]
cache-cache_patch:transformers-oblivious/inputs VALIDATE:  94%|█████████▍| 151/160 [03:45<00:18,  2.05s/it]
cache-cache_patch:transformers-oblivious/inputs_prompt EXPORT:  94%|█████████▍| 151/160 [03:45<00:18,  2.05s/it]
cache-cache_patch:transformers-oblivious/inputs2 EXPORT:  94%|█████████▍| 151/160 [03:45<00:18,  2.05s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious/inputs2 VALIDATE:  94%|█████████▍| 151/160 [03:49<00:18,  2.05s/it]
cache-cache_patch:transformers-oblivious/inputs2 VALIDATE:  96%|█████████▌| 153/160 [03:49<00:13,  1.99s/it]
cache-cache_patch:transformers-oblivious/inputs_empty_cache EXPORT:  96%|█████████▌| 153/160 [03:49<00:13,  1.99s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious/inputs_empty_cache VALIDATE:  96%|█████████▌| 153/160 [03:56<00:13,  1.99s/it]
cache-cache_patch:transformers-oblivious/inputs_empty_cache VALIDATE:  96%|█████████▋| 154/160 [03:56<00:19,  3.17s/it]
cache-cache_patch:transformers-oblivious/inputs_batch1 EXPORT:  96%|█████████▋| 154/160 [03:56<00:19,  3.17s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious/inputs_batch1 VALIDATE:  96%|█████████▋| 154/160 [04:01<00:19,  3.17s/it]
cache-cache_patch:transformers-oblivious/inputs_batch1 VALIDATE:  97%|█████████▋| 155/160 [04:01<00:17,  3.59s/it]
cache-cache_patch:transformers-oblivious-rt/inputs EXPORT:  97%|█████████▋| 155/160 [04:01<00:17,  3.59s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious-rt/inputs VALIDATE:  97%|█████████▋| 155/160 [04:05<00:17,  3.59s/it]
cache-cache_patch:transformers-oblivious-rt/inputs VALIDATE:  98%|█████████▊| 156/160 [04:05<00:14,  3.72s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_prompt EXPORT:  98%|█████████▊| 156/160 [04:05<00:14,  3.72s/it]
cache-cache_patch:transformers-oblivious-rt/inputs2 EXPORT:  98%|█████████▊| 156/160 [04:05<00:14,  3.72s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious-rt/inputs2 VALIDATE:  98%|█████████▊| 156/160 [04:09<00:14,  3.72s/it]
cache-cache_patch:transformers-oblivious-rt/inputs2 VALIDATE:  99%|█████████▉| 158/160 [04:09<00:05,  2.94s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_empty_cache EXPORT:  99%|█████████▉| 158/160 [04:09<00:05,  2.94s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious-rt/inputs_empty_cache VALIDATE:  99%|█████████▉| 158/160 [04:12<00:05,  2.94s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_empty_cache VALIDATE:  99%|█████████▉| 159/160 [04:12<00:02,  2.95s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_batch1 EXPORT:  99%|█████████▉| 159/160 [04:12<00:02,  2.95s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious-rt/inputs_batch1 VALIDATE:  99%|█████████▉| 159/160 [04:16<00:02,  2.95s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_batch1 VALIDATE: 100%|██████████| 160/160 [04:16<00:00,  3.24s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_batch1 VALIDATE: 100%|██████████| 160/160 [04:16<00:00,  1.60s/it]

Let’s save the results.

df = pandas.DataFrame(results)
df.to_excel("plot_export_tiny_llm_dim01_onnx.xlsx")
df

	cache	cache_patch	oblivious	rt	export_with	EXPORT	ERR-EXPORT	run_with	WORKS	ERR-RUN
0	0	0	0	0	inputs	0	Failed to decompose the FX graph for ONNX comp...	NaN	NaN	NaN
1	0	0	0	0	inputs_prompt	0	When `dynamic_shapes` is specified as a dict, ...	NaN	NaN	NaN
2	0	0	0	0	inputs2	0	Failed to decompose the FX graph for ONNX comp...	NaN	NaN	NaN
3	0	0	0	0	inputs_empty_cache	0	Found the following conflicts between user-spe...	NaN	NaN	NaN
4	0	0	0	0	inputs_batch1	0	Found the following conflicts between user-spe...	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...
207	1	transformers	1	1	inputs_batch1	1	NaN	inputs	1.0	NaN
208	1	transformers	1	1	inputs_batch1	1	NaN	inputs_prompt	0.0	Not the same number of given inputs 1 and the ...
209	1	transformers	1	1	inputs_batch1	1	NaN	inputs2	1.0	NaN
210	1	transformers	1	1	inputs_batch1	1	NaN	inputs_empty_cache	1.0	NaN
211	1	transformers	1	1	inputs_batch1	1	NaN	inputs_batch1	1.0	NaN

212 rows × 10 columns

no_export = df[df.EXPORT == 0]
no_export.to_excel("plot_export_tiny_llm_dim01_onnx.no_export.xlsx")
no_export

	cache	cache_patch	oblivious	rt	export_with	EXPORT	ERR-EXPORT	run_with	WORKS	ERR-RUN
0	0	0	0	0	inputs	0	Failed to decompose the FX graph for ONNX comp...	NaN	NaN	NaN
1	0	0	0	0	inputs_prompt	0	When `dynamic_shapes` is specified as a dict, ...	NaN	NaN	NaN
2	0	0	0	0	inputs2	0	Failed to decompose the FX graph for ONNX comp...	NaN	NaN	NaN
3	0	0	0	0	inputs_empty_cache	0	Found the following conflicts between user-spe...	NaN	NaN	NaN
4	0	0	0	0	inputs_batch1	0	Found the following conflicts between user-spe...	NaN	NaN	NaN
...	...	...	...	...	...	...	...	...	...	...
162	1	transformers	0	1	inputs_prompt	0	When `dynamic_shapes` is specified as a dict, ...	NaN	NaN	NaN
168	1	transformers	0	1	inputs_empty_cache	0	Found the following conflicts between user-spe...	NaN	NaN	NaN
169	1	transformers	0	1	inputs_batch1	0	Found the following conflicts between user-spe...	NaN	NaN	NaN
175	1	transformers	1	0	inputs_prompt	0	When `dynamic_shapes` is specified as a dict, ...	NaN	NaN	NaN
196	1	transformers	1	1	inputs_prompt	0	When `dynamic_shapes` is specified as a dict, ...	NaN	NaN	NaN

72 rows × 10 columns

The validation failures.

invalid = df[(df.EXPORT == 1) & (df.WORKS == 0)].pivot(
    index=["cache", "cache_patch", "oblivious", "rt", "export_with"],
    columns=["run_with"],
    values=["WORKS", "ERR-RUN"],
)
invalid.to_excel("plot_export_tiny_llm_dim01_onnx.invalid.xlsx")
invalid

					WORKS					ERR-RUN
				run_with	inputs	inputs2	inputs_batch1	inputs_empty_cache	inputs_prompt	inputs	inputs2	inputs_batch1	inputs_empty_cache	inputs_prompt
cache	cache_patch	oblivious	rt	export_with
1	all	0	0	inputs	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs2	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs_batch1	0.0	0.0	NaN	0.0	0.0	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	NaN	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	Not the same number of given inputs 1 and the ...
				inputs_empty_cache	0.0	0.0	0.0	NaN	0.0	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	NaN	Not the same number of given inputs 1 and the ...
			1	inputs	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs2	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs_batch1	0.0	0.0	NaN	0.0	0.0	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	NaN	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	Not the same number of given inputs 1 and the ...
				inputs_empty_cache	0.0	0.0	0.0	NaN	0.0	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	[ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go...	NaN	Not the same number of given inputs 1 and the ...
		1	0	inputs	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs2	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs_batch1	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs_empty_cache	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
			1	inputs	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs2	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs_batch1	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs_empty_cache	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
	transformers	0	0	inputs	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
			0	inputs2	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
			1	inputs	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
			1	inputs2	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
		1	0	inputs	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs2	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs_batch1	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs_empty_cache	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
			1	inputs	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs2	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs_batch1	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...
				inputs_empty_cache	NaN	NaN	NaN	NaN	0.0	NaN	NaN	NaN	NaN	Not the same number of given inputs 1 and the ...

success = df[(df.EXPORT == 1) & (df.WORKS == 1)].pivot(
    index=["cache", "cache_patch", "oblivious", "rt", "export_with"],
    columns=["run_with"],
    values=["WORKS"],
)
success.to_excel("plot_export_tiny_llm_dim01_onnx.success.xlsx")
success

					WORKS
				run_with	inputs	inputs2	inputs_batch1	inputs_empty_cache
cache	cache_patch	oblivious	rt	export_with
1	all	0	0	inputs	1.0	1.0	1.0	1.0
				inputs2	1.0	1.0	1.0	1.0
				inputs_batch1	NaN	NaN	1.0	NaN
				inputs_empty_cache	NaN	NaN	NaN	1.0
			1	inputs	1.0	1.0	1.0	1.0
				inputs2	1.0	1.0	1.0	1.0
				inputs_batch1	NaN	NaN	1.0	NaN
				inputs_empty_cache	NaN	NaN	NaN	1.0
		1	0	inputs	1.0	1.0	1.0	1.0
				inputs2	1.0	1.0	1.0	1.0
				inputs_batch1	1.0	1.0	1.0	1.0
				inputs_empty_cache	1.0	1.0	1.0	1.0
			1	inputs	1.0	1.0	1.0	1.0
				inputs2	1.0	1.0	1.0	1.0
				inputs_batch1	1.0	1.0	1.0	1.0
				inputs_empty_cache	1.0	1.0	1.0	1.0
	transformers	0	0	inputs	1.0	1.0	1.0	1.0
			0	inputs2	1.0	1.0	1.0	1.0
			1	inputs	1.0	1.0	1.0	1.0
			1	inputs2	1.0	1.0	1.0	1.0
		1	0	inputs	1.0	1.0	1.0	1.0
				inputs2	1.0	1.0	1.0	1.0
				inputs_batch1	1.0	1.0	1.0	1.0
				inputs_empty_cache	1.0	1.0	1.0	1.0
			1	inputs	1.0	1.0	1.0	1.0
				inputs2	1.0	1.0	1.0	1.0
				inputs_batch1	1.0	1.0	1.0	1.0
				inputs_empty_cache	1.0	1.0	1.0	1.0

If you have any error, then look at example Export Tiny-LLM with patches.

doc.plot_legend("Tiny-LLM\nexport with\ndimension in {0,1}", "torch.onnx.export", "tomato")

Total running time of the script: (4 minutes 22.009 seconds)

Related examples

Export with dynamic dimensions in {0,1} into ONNX (custom)

Export with dynamic dimensions in {0,1}

Export microsoft/phi-2

Gallery generated by Sphinx-Gallery

Export with dynamic dimensions in {0,1} into ONNX¶

Available input sets¶

Export with options¶

The main loop¶

Export with dynamic dimensions in `{0,1}` into ONNX¶