Export with dynamic dimensions in {0,1} into ONNX

This duplicates the example Export with dynamic dimensions in {0,1} but for torch.onnx.export(). It checks what inputs can be used to export and with which inputs it can work.

Available input sets

import itertools
from tqdm import tqdm
import numpy as np
import pandas
import torch
import onnxruntime
from onnx_diagnostic import doc
from onnx_diagnostic.helpers import max_diff, string_type, flatten_object
from onnx_diagnostic.helpers.torch_helper import torch_deepcopy
from onnx_diagnostic.helpers.rt_helper import make_feeds
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
from onnx_diagnostic.torch_export_patches import (
    torch_export_patches,
    register_additional_serialization_functions,
)

data = get_untrained_model_with_inputs("arnir0/Tiny-LLM", add_second_input=True)
model, dynamic_shapes = data["model"], data["dynamic_shapes"]

The trained model can be obtained with:

MODEL_NAME = "arnir0/Tiny-LLM"
tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME)
input_sets = {k: v for k, v in data.items() if k.startswith("inputs")}

for k, v in input_sets.items():
    print(f"{k:20}: {string_type(v, with_shape=True)}")
inputs              : dict(input_ids:T7s2x3,attention_mask:T7s2x33,position_ids:T7s2x3,past_key_values:DynamicCache(key_cache=#1[T1s2x1x30x96], value_cache=#1[T1s2x1x30x96]))
inputs_prompt       : dict(input_ids:T7s1x11)
inputs2             : dict(input_ids:T7s3x4,attention_mask:T7s3x35,position_ids:T7s3x4,past_key_values:DynamicCache(key_cache=#1[T1s3x1x31x96], value_cache=#1[T1s3x1x31x96]))
inputs_empty_cache  : dict(input_ids:T7s2x3,attention_mask:T7s2x3,position_ids:T7s2x3,past_key_values:DynamicCache(key_cache=#1[T1s2x1x0x96], value_cache=#1[T1s2x1x0x96]))
inputs_batch1       : dict(input_ids:T7s1x3,attention_mask:T7s1x33,position_ids:T7s1x3,past_key_values:DynamicCache(key_cache=#1[T1s1x1x30x96], value_cache=#1[T1s1x1x30x96]))

The dynamic shapes are:

print(f"dynamic_shapes: {string_type(dynamic_shapes)}")
dynamic_shapes: dict(input_ids:{0:DYN(batch),1:DYN(seq_length)},attention_mask:{0:DYN(batch),1:DYN(cache+seq)},position_ids:{0:DYN(batch),1:DYN(seq_length)},past_key_values:#2[{0:DYN(batch),2:DYN(cache_length)},{0:DYN(batch),2:DYN(cache_length)}])

Let’s check they all work and compute the expected values. We use deepcopy because caches are usually modified inplace.

expected = {}
for k, v in input_sets.items():
    expected[k] = model(**torch_deepcopy(v))
    print(f"{k:20}: {string_type(expected[k], with_shape=True)}")
inputs              : CausalLMOutputWithPast(logits:T1s2x3x32000,past_key_values:DynamicCache(key_cache=#1[T1s2x1x33x96], value_cache=#1[T1s2x1x33x96]))
inputs_prompt       : CausalLMOutputWithPast(logits:T1s1x11x32000,past_key_values:DynamicCache(key_cache=#1[T1s1x1x11x96], value_cache=#1[T1s1x1x11x96]))
inputs2             : CausalLMOutputWithPast(logits:T1s3x4x32000,past_key_values:DynamicCache(key_cache=#1[T1s3x1x35x96], value_cache=#1[T1s3x1x35x96]))
inputs_empty_cache  : CausalLMOutputWithPast(logits:T1s2x3x32000,past_key_values:DynamicCache(key_cache=#1[T1s2x1x3x96], value_cache=#1[T1s2x1x3x96]))
inputs_batch1       : CausalLMOutputWithPast(logits:T1s1x3x32000,past_key_values:DynamicCache(key_cache=#1[T1s1x1x33x96], value_cache=#1[T1s1x1x33x96]))

Export with options

We try to export with the following options:

Some function first.

def export_model(
    model, dynamic_shapes, inputs, cache=False, oblivious=False, rt=False, cache_patch=False
):
    if cache and not cache_patch:
        with register_additional_serialization_functions(patch_transformers=True):
            return export_model(model, dynamic_shapes, inputs, oblivious=oblivious, rt=rt)
    if cache_patch:
        with torch_export_patches(
            patch_torch=cache_patch in ("all", "torch", True, 1),
            patch_transformers=cache_patch in ("all", "transformers", True, 1),
        ):
            return export_model(model, dynamic_shapes, inputs, oblivious=oblivious, rt=rt)
    if oblivious:
        with torch.fx.experimental._config.patch(backed_size_oblivious=True):
            return export_model(model, dynamic_shapes, inputs, rt=rt)
    ep = torch.export.export(
        model,
        (),
        inputs,
        dynamic_shapes=use_dyn_not_str(dynamic_shapes),
        prefer_deferred_runtime_asserts_over_guards=rt,
    )
    return torch.onnx.export(ep, args=(), kwargs=inputs, dynamic_shapes=dynamic_shapes)


def try_export_model(
    model, dynamic_shapes, inputs, cache=False, oblivious=False, rt=False, cache_patch=False
):
    try:
        return export_model(
            model,
            dynamic_shapes,
            inputs,
            cache=cache,
            oblivious=oblivious,
            rt=rt,
            cache_patch=cache_patch,
        )
    except Exception as e:
        return e


def validation(ep, input_sets, expected, catch_exception=True):
    sess = onnxruntime.InferenceSession(
        ep.model_proto.SerializeToString(), providers=["CPUExecutionProvider"]
    )
    for k, v in input_sets.items():
        try:
            feeds = make_feeds(sess, torch_deepcopy(v), use_numpy=True)
        except Exception as e:
            if not catch_exception:
                raise
            yield k, e
            continue
        try:
            got = sess.run(None, feeds)
        except Exception as e:
            if not catch_exception:
                raise
            yield k, e
            continue
        yield k, max_diff(flatten_object(expected[k], drop_keys=True), got)

Verification an example known to be working is.

ep = export_model(
    model,
    dynamic_shapes,
    torch_deepcopy(input_sets["inputs"]),
    cache_patch=True,
)
res = list(validation(ep, dict(inputs=input_sets["inputs"]), expected, catch_exception=False))
assert res[0][1]["abs"] < 1e-5, f"Unexpected issue with res={res}"
[torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

The main loop

results = []

possibilities = [*[[0, 1] for _ in range(4)], list(input_sets)]
possibilities[1] = [0, "all", "torch", "transformers"]
with tqdm(list(itertools.product(*possibilities))) as pbar:
    for cache, cache_patch, oblivious, rt, inputs in pbar:
        if cache_patch and not cache:
            # patches include caches.
            continue
        kwargs = dict(cache=cache, cache_patch=cache_patch, oblivious=oblivious, rt=rt)
        legend = "-".join(
            (k if isinstance(v, int) else f"{k}:{v}") for k, v in kwargs.items() if v
        )
        legend = f"{legend}/{inputs}"
        pbar.set_description(f"{legend} EXPORT")

        # export
        ep = try_export_model(
            model, dynamic_shapes, torch_deepcopy(input_sets[inputs]), **kwargs
        )
        if isinstance(ep, Exception):
            obs = {
                **kwargs,
                "export_with": inputs,
                "EXPORT": 0,
                "ERR-EXPORT": str(ep).split("\n")[0],
            }
            results.append(obs)
            continue

        pbar.set_description(f"{legend} VALIDATE")
        common = {**kwargs, "export_with": inputs, "EXPORT": 1}
        for inp, res in validation(ep, input_sets, expected):
            if isinstance(res, Exception):
                obs = {
                    **common,
                    "run_with": inp,
                    "ERR-RUN": str(res).split("\n")[0],
                    "WORKS": 0,
                }
            else:
                obs = {
                    **common,
                    "run_with": inp,
                    "WORKS": int(~np.isnan(res["abs"]) and res["abs"] < 1e-3),
                }
            results.append(obs)
  0%|          | 0/160 [00:00<?, ?it/s]
/inputs EXPORT:   0%|          | 0/160 [00:00<?, ?it/s][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

/inputs EXPORT:   1%|          | 1/160 [00:03<08:07,  3.07s/it]
/inputs_prompt EXPORT:   1%|          | 1/160 [00:03<08:07,  3.07s/it]
/inputs2 EXPORT:   1%|          | 1/160 [00:03<08:07,  3.07s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

/inputs2 EXPORT:   2%|▏         | 3/160 [00:06<05:10,  1.98s/it]
/inputs_empty_cache EXPORT:   2%|▏         | 3/160 [00:06<05:10,  1.98s/it]
/inputs_empty_cache EXPORT:   2%|▎         | 4/160 [00:07<04:07,  1.59s/it]
/inputs_batch1 EXPORT:   2%|▎         | 4/160 [00:07<04:07,  1.59s/it]
/inputs_batch1 EXPORT:   3%|▎         | 5/160 [00:07<03:23,  1.31s/it]
rt/inputs EXPORT:   3%|▎         | 5/160 [00:07<03:23,  1.31s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

rt/inputs EXPORT:   4%|▍         | 6/160 [00:11<04:58,  1.94s/it]
rt/inputs_prompt EXPORT:   4%|▍         | 6/160 [00:11<04:58,  1.94s/it]
rt/inputs2 EXPORT:   4%|▍         | 6/160 [00:11<04:58,  1.94s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

rt/inputs2 EXPORT:   5%|▌         | 8/160 [00:14<04:32,  1.79s/it]
rt/inputs_empty_cache EXPORT:   5%|▌         | 8/160 [00:14<04:32,  1.79s/it]
rt/inputs_empty_cache EXPORT:   6%|▌         | 9/160 [00:15<03:54,  1.56s/it]
rt/inputs_batch1 EXPORT:   6%|▌         | 9/160 [00:15<03:54,  1.56s/it]
rt/inputs_batch1 EXPORT:   6%|▋         | 10/160 [00:16<03:55,  1.57s/it]
oblivious/inputs EXPORT:   6%|▋         | 10/160 [00:16<03:55,  1.57s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious/inputs EXPORT:   7%|▋         | 11/160 [00:20<05:00,  2.02s/it]
oblivious/inputs_prompt EXPORT:   7%|▋         | 11/160 [00:20<05:00,  2.02s/it]
oblivious/inputs2 EXPORT:   7%|▋         | 11/160 [00:20<05:00,  2.02s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious/inputs2 EXPORT:   8%|▊         | 13/160 [00:22<04:20,  1.78s/it]
oblivious/inputs_empty_cache EXPORT:   8%|▊         | 13/160 [00:22<04:20,  1.78s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious/inputs_empty_cache EXPORT:   9%|▉         | 14/160 [00:25<04:57,  2.04s/it]
oblivious/inputs_batch1 EXPORT:   9%|▉         | 14/160 [00:25<04:57,  2.04s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious/inputs_batch1 EXPORT:   9%|▉         | 15/160 [00:29<05:41,  2.35s/it]
oblivious-rt/inputs EXPORT:   9%|▉         | 15/160 [00:29<05:41,  2.35s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious-rt/inputs EXPORT:  10%|█         | 16/160 [00:32<06:01,  2.51s/it]
oblivious-rt/inputs_prompt EXPORT:  10%|█         | 16/160 [00:32<06:01,  2.51s/it]
oblivious-rt/inputs2 EXPORT:  10%|█         | 16/160 [00:32<06:01,  2.51s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious-rt/inputs2 EXPORT:  11%|█▏        | 18/160 [00:36<05:42,  2.41s/it]
oblivious-rt/inputs_empty_cache EXPORT:  11%|█▏        | 18/160 [00:36<05:42,  2.41s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious-rt/inputs_empty_cache EXPORT:  12%|█▏        | 19/160 [00:39<06:11,  2.63s/it]
oblivious-rt/inputs_batch1 EXPORT:  12%|█▏        | 19/160 [00:39<06:11,  2.63s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

oblivious-rt/inputs_batch1 EXPORT:  12%|█▎        | 20/160 [00:43<06:29,  2.78s/it]
cache/inputs EXPORT:  12%|█▎        | 20/160 [00:43<06:29,  2.78s/it]              [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache/inputs EXPORT:  51%|█████     | 81/160 [00:46<00:13,  5.78it/s]
cache/inputs_prompt EXPORT:  51%|█████     | 81/160 [00:46<00:13,  5.78it/s]
cache/inputs2 EXPORT:  51%|█████     | 81/160 [00:46<00:13,  5.78it/s]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache/inputs2 EXPORT:  52%|█████▏    | 83/160 [00:48<00:16,  4.77it/s]
cache/inputs_empty_cache EXPORT:  52%|█████▏    | 83/160 [00:48<00:16,  4.77it/s]
cache/inputs_empty_cache EXPORT:  52%|█████▎    | 84/160 [00:49<00:17,  4.39it/s]
cache/inputs_batch1 EXPORT:  52%|█████▎    | 84/160 [00:49<00:17,  4.39it/s]
cache/inputs_batch1 EXPORT:  53%|█████▎    | 85/160 [00:50<00:18,  4.05it/s]
cache-rt/inputs EXPORT:  53%|█████▎    | 85/160 [00:50<00:18,  4.05it/s]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-rt/inputs EXPORT:  54%|█████▍    | 86/160 [00:52<00:28,  2.58it/s]
cache-rt/inputs_prompt EXPORT:  54%|█████▍    | 86/160 [00:52<00:28,  2.58it/s]
cache-rt/inputs2 EXPORT:  54%|█████▍    | 86/160 [00:52<00:28,  2.58it/s]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-rt/inputs2 EXPORT:  55%|█████▌    | 88/160 [00:57<00:48,  1.48it/s]
cache-rt/inputs_empty_cache EXPORT:  55%|█████▌    | 88/160 [00:57<00:48,  1.48it/s]
cache-rt/inputs_empty_cache EXPORT:  56%|█████▌    | 89/160 [00:58<00:49,  1.43it/s]
cache-rt/inputs_batch1 EXPORT:  56%|█████▌    | 89/160 [00:58<00:49,  1.43it/s]
cache-rt/inputs_batch1 EXPORT:  56%|█████▋    | 90/160 [00:59<00:49,  1.42it/s]
cache-oblivious/inputs EXPORT:  56%|█████▋    | 90/160 [00:59<00:49,  1.42it/s][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious/inputs EXPORT:  57%|█████▋    | 91/160 [01:02<01:09,  1.00s/it]
cache-oblivious/inputs_prompt EXPORT:  57%|█████▋    | 91/160 [01:02<01:09,  1.00s/it]
cache-oblivious/inputs2 EXPORT:  57%|█████▋    | 91/160 [01:02<01:09,  1.00s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious/inputs2 EXPORT:  58%|█████▊    | 93/160 [01:05<01:15,  1.12s/it]
cache-oblivious/inputs_empty_cache EXPORT:  58%|█████▊    | 93/160 [01:05<01:15,  1.12s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious/inputs_empty_cache EXPORT:  59%|█████▉    | 94/160 [01:08<01:31,  1.39s/it]
cache-oblivious/inputs_batch1 EXPORT:  59%|█████▉    | 94/160 [01:08<01:31,  1.39s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious/inputs_batch1 EXPORT:  59%|█████▉    | 95/160 [01:10<01:47,  1.66s/it]
cache-oblivious-rt/inputs EXPORT:  59%|█████▉    | 95/160 [01:10<01:47,  1.66s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious-rt/inputs EXPORT:  60%|██████    | 96/160 [01:13<02:02,  1.92s/it]
cache-oblivious-rt/inputs_prompt EXPORT:  60%|██████    | 96/160 [01:13<02:02,  1.92s/it]
cache-oblivious-rt/inputs2 EXPORT:  60%|██████    | 96/160 [01:13<02:02,  1.92s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious-rt/inputs2 EXPORT:  61%|██████▏   | 98/160 [01:16<01:48,  1.74s/it]
cache-oblivious-rt/inputs_empty_cache EXPORT:  61%|██████▏   | 98/160 [01:16<01:48,  1.74s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious-rt/inputs_empty_cache EXPORT:  62%|██████▏   | 99/160 [01:20<02:19,  2.28s/it]
cache-oblivious-rt/inputs_batch1 EXPORT:  62%|██████▏   | 99/160 [01:20<02:19,  2.28s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-oblivious-rt/inputs_batch1 EXPORT:  62%|██████▎   | 100/160 [01:24<02:30,  2.51s/it]
cache-cache_patch:all/inputs EXPORT:  62%|██████▎   | 100/160 [01:24<02:30,  2.51s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all/inputs VALIDATE:  62%|██████▎   | 100/160 [01:27<02:30,  2.51s/it]
cache-cache_patch:all/inputs VALIDATE:  63%|██████▎   | 101/160 [01:28<02:49,  2.88s/it]
cache-cache_patch:all/inputs_prompt EXPORT:  63%|██████▎   | 101/160 [01:28<02:49,  2.88s/it]
cache-cache_patch:all/inputs2 EXPORT:  63%|██████▎   | 101/160 [01:28<02:49,  2.88s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all/inputs2 VALIDATE:  63%|██████▎   | 101/160 [01:31<02:49,  2.88s/it]
cache-cache_patch:all/inputs2 VALIDATE:  64%|██████▍   | 103/160 [01:32<02:22,  2.50s/it]
cache-cache_patch:all/inputs_empty_cache EXPORT:  64%|██████▍   | 103/160 [01:32<02:22,  2.50s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 43 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all/inputs_empty_cache VALIDATE:  64%|██████▍   | 103/160 [01:36<02:22,  2.50s/it]
cache-cache_patch:all/inputs_empty_cache VALIDATE:  65%|██████▌   | 104/160 [01:36<02:44,  2.93s/it]
cache-cache_patch:all/inputs_batch1 EXPORT:  65%|██████▌   | 104/160 [01:36<02:44,  2.93s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 17 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all/inputs_batch1 VALIDATE:  65%|██████▌   | 104/160 [01:39<02:44,  2.93s/it]
cache-cache_patch:all/inputs_batch1 VALIDATE:  66%|██████▌   | 105/160 [01:39<02:43,  2.97s/it]
cache-cache_patch:all-rt/inputs EXPORT:  66%|██████▌   | 105/160 [01:39<02:43,  2.97s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-rt/inputs VALIDATE:  66%|██████▌   | 105/160 [01:45<02:43,  2.97s/it]
cache-cache_patch:all-rt/inputs VALIDATE:  66%|██████▋   | 106/160 [01:45<03:28,  3.86s/it]
cache-cache_patch:all-rt/inputs_prompt EXPORT:  66%|██████▋   | 106/160 [01:45<03:28,  3.86s/it]
cache-cache_patch:all-rt/inputs2 EXPORT:  66%|██████▋   | 106/160 [01:45<03:28,  3.86s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-rt/inputs2 VALIDATE:  66%|██████▋   | 106/160 [01:49<03:28,  3.86s/it]
cache-cache_patch:all-rt/inputs2 VALIDATE:  68%|██████▊   | 108/160 [01:50<02:41,  3.11s/it]
cache-cache_patch:all-rt/inputs_empty_cache EXPORT:  68%|██████▊   | 108/160 [01:50<02:41,  3.11s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 43 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-rt/inputs_empty_cache VALIDATE:  68%|██████▊   | 108/160 [01:52<02:41,  3.11s/it]
cache-cache_patch:all-rt/inputs_empty_cache VALIDATE:  68%|██████▊   | 109/160 [01:53<02:38,  3.10s/it]
cache-cache_patch:all-rt/inputs_batch1 EXPORT:  68%|██████▊   | 109/160 [01:53<02:38,  3.10s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 17 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-rt/inputs_batch1 VALIDATE:  68%|██████▊   | 109/160 [01:55<02:38,  3.10s/it]
cache-cache_patch:all-rt/inputs_batch1 VALIDATE:  69%|██████▉   | 110/160 [01:56<02:34,  3.09s/it]
cache-cache_patch:all-oblivious/inputs EXPORT:  69%|██████▉   | 110/160 [01:56<02:34,  3.09s/it]  [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious/inputs VALIDATE:  69%|██████▉   | 110/160 [02:00<02:34,  3.09s/it]
cache-cache_patch:all-oblivious/inputs VALIDATE:  69%|██████▉   | 111/160 [02:00<02:45,  3.38s/it]
cache-cache_patch:all-oblivious/inputs_prompt EXPORT:  69%|██████▉   | 111/160 [02:00<02:45,  3.38s/it]
cache-cache_patch:all-oblivious/inputs2 EXPORT:  69%|██████▉   | 111/160 [02:00<02:45,  3.38s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious/inputs2 VALIDATE:  69%|██████▉   | 111/160 [02:04<02:45,  3.38s/it]
cache-cache_patch:all-oblivious/inputs2 VALIDATE:  71%|███████   | 113/160 [02:04<02:11,  2.79s/it]
cache-cache_patch:all-oblivious/inputs_empty_cache EXPORT:  71%|███████   | 113/160 [02:04<02:11,  2.79s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious/inputs_empty_cache VALIDATE:  71%|███████   | 113/160 [02:10<02:11,  2.79s/it]
cache-cache_patch:all-oblivious/inputs_empty_cache VALIDATE:  71%|███████▏  | 114/160 [02:11<02:51,  3.72s/it]
cache-cache_patch:all-oblivious/inputs_batch1 EXPORT:  71%|███████▏  | 114/160 [02:11<02:51,  3.72s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious/inputs_batch1 VALIDATE:  71%|███████▏  | 114/160 [02:15<02:51,  3.72s/it]
cache-cache_patch:all-oblivious/inputs_batch1 VALIDATE:  72%|███████▏  | 115/160 [02:15<02:55,  3.90s/it]
cache-cache_patch:all-oblivious-rt/inputs EXPORT:  72%|███████▏  | 115/160 [02:15<02:55,  3.90s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious-rt/inputs VALIDATE:  72%|███████▏  | 115/160 [02:19<02:55,  3.90s/it]
cache-cache_patch:all-oblivious-rt/inputs VALIDATE:  72%|███████▎  | 116/160 [02:19<02:55,  3.98s/it]
cache-cache_patch:all-oblivious-rt/inputs_prompt EXPORT:  72%|███████▎  | 116/160 [02:19<02:55,  3.98s/it]
cache-cache_patch:all-oblivious-rt/inputs2 EXPORT:  72%|███████▎  | 116/160 [02:19<02:55,  3.98s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious-rt/inputs2 VALIDATE:  72%|███████▎  | 116/160 [02:22<02:55,  3.98s/it]
cache-cache_patch:all-oblivious-rt/inputs2 VALIDATE:  74%|███████▍  | 118/160 [02:22<02:02,  2.93s/it]
cache-cache_patch:all-oblivious-rt/inputs_empty_cache EXPORT:  74%|███████▍  | 118/160 [02:22<02:02,  2.93s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious-rt/inputs_empty_cache VALIDATE:  74%|███████▍  | 118/160 [02:26<02:02,  2.93s/it]
cache-cache_patch:all-oblivious-rt/inputs_empty_cache VALIDATE:  74%|███████▍  | 119/160 [02:26<02:10,  3.18s/it]
cache-cache_patch:all-oblivious-rt/inputs_batch1 EXPORT:  74%|███████▍  | 119/160 [02:26<02:10,  3.18s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 31 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:all-oblivious-rt/inputs_batch1 VALIDATE:  74%|███████▍  | 119/160 [02:31<02:10,  3.18s/it]
cache-cache_patch:all-oblivious-rt/inputs_batch1 VALIDATE:  75%|███████▌  | 120/160 [02:31<02:20,  3.52s/it]
cache-cache_patch:torch/inputs EXPORT:  75%|███████▌  | 120/160 [02:31<02:20,  3.52s/it]                    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch/inputs EXPORT:  76%|███████▌  | 121/160 [02:37<02:43,  4.19s/it]
cache-cache_patch:torch/inputs_prompt EXPORT:  76%|███████▌  | 121/160 [02:37<02:43,  4.19s/it]
cache-cache_patch:torch/inputs2 EXPORT:  76%|███████▌  | 121/160 [02:37<02:43,  4.19s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch/inputs2 EXPORT:  77%|███████▋  | 123/160 [02:40<01:50,  3.00s/it]
cache-cache_patch:torch/inputs_empty_cache EXPORT:  77%|███████▋  | 123/160 [02:40<01:50,  3.00s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch/inputs_empty_cache EXPORT:  78%|███████▊  | 124/160 [02:43<01:46,  2.95s/it]
cache-cache_patch:torch/inputs_batch1 EXPORT:  78%|███████▊  | 124/160 [02:43<01:46,  2.95s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch/inputs_batch1 EXPORT:  78%|███████▊  | 125/160 [02:45<01:36,  2.77s/it]
cache-cache_patch:torch-rt/inputs EXPORT:  78%|███████▊  | 125/160 [02:45<01:36,  2.77s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-rt/inputs EXPORT:  79%|███████▉  | 126/160 [02:48<01:35,  2.81s/it]
cache-cache_patch:torch-rt/inputs_prompt EXPORT:  79%|███████▉  | 126/160 [02:48<01:35,  2.81s/it]
cache-cache_patch:torch-rt/inputs2 EXPORT:  79%|███████▉  | 126/160 [02:48<01:35,  2.81s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-rt/inputs2 EXPORT:  80%|████████  | 128/160 [02:51<01:13,  2.29s/it]
cache-cache_patch:torch-rt/inputs_empty_cache EXPORT:  80%|████████  | 128/160 [02:51<01:13,  2.29s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-rt/inputs_empty_cache EXPORT:  81%|████████  | 129/160 [02:53<01:10,  2.27s/it]
cache-cache_patch:torch-rt/inputs_batch1 EXPORT:  81%|████████  | 129/160 [02:53<01:10,  2.27s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-rt/inputs_batch1 EXPORT:  81%|████████▏ | 130/160 [02:55<01:07,  2.25s/it]
cache-cache_patch:torch-oblivious/inputs EXPORT:  81%|████████▏ | 130/160 [02:55<01:07,  2.25s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious/inputs EXPORT:  82%|████████▏ | 131/160 [03:01<01:31,  3.16s/it]
cache-cache_patch:torch-oblivious/inputs_prompt EXPORT:  82%|████████▏ | 131/160 [03:01<01:31,  3.16s/it]
cache-cache_patch:torch-oblivious/inputs2 EXPORT:  82%|████████▏ | 131/160 [03:01<01:31,  3.16s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious/inputs2 EXPORT:  83%|████████▎ | 133/160 [03:04<01:06,  2.46s/it]
cache-cache_patch:torch-oblivious/inputs_empty_cache EXPORT:  83%|████████▎ | 133/160 [03:04<01:06,  2.46s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious/inputs_empty_cache EXPORT:  84%|████████▍ | 134/160 [03:07<01:06,  2.55s/it]
cache-cache_patch:torch-oblivious/inputs_batch1 EXPORT:  84%|████████▍ | 134/160 [03:07<01:06,  2.55s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious/inputs_batch1 EXPORT:  84%|████████▍ | 135/160 [03:10<01:06,  2.67s/it]
cache-cache_patch:torch-oblivious-rt/inputs EXPORT:  84%|████████▍ | 135/160 [03:10<01:06,  2.67s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious-rt/inputs EXPORT:  85%|████████▌ | 136/160 [03:13<01:06,  2.75s/it]
cache-cache_patch:torch-oblivious-rt/inputs_prompt EXPORT:  85%|████████▌ | 136/160 [03:13<01:06,  2.75s/it]
cache-cache_patch:torch-oblivious-rt/inputs2 EXPORT:  85%|████████▌ | 136/160 [03:13<01:06,  2.75s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious-rt/inputs2 EXPORT:  86%|████████▋ | 138/160 [03:16<00:48,  2.20s/it]
cache-cache_patch:torch-oblivious-rt/inputs_empty_cache EXPORT:  86%|████████▋ | 138/160 [03:16<00:48,  2.20s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious-rt/inputs_empty_cache EXPORT:  87%|████████▋ | 139/160 [03:19<00:49,  2.35s/it]
cache-cache_patch:torch-oblivious-rt/inputs_batch1 EXPORT:  87%|████████▋ | 139/160 [03:19<00:49,  2.35s/it]     [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ❌

cache-cache_patch:torch-oblivious-rt/inputs_batch1 EXPORT:  88%|████████▊ | 140/160 [03:22<00:50,  2.51s/it]
cache-cache_patch:transformers/inputs EXPORT:  88%|████████▊ | 140/160 [03:22<00:50,  2.51s/it]             [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers/inputs VALIDATE:  88%|████████▊ | 140/160 [03:28<00:50,  2.51s/it]
cache-cache_patch:transformers/inputs VALIDATE:  88%|████████▊ | 141/160 [03:28<01:07,  3.57s/it]
cache-cache_patch:transformers/inputs_prompt EXPORT:  88%|████████▊ | 141/160 [03:28<01:07,  3.57s/it]
cache-cache_patch:transformers/inputs2 EXPORT:  88%|████████▊ | 141/160 [03:28<01:07,  3.57s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers/inputs2 VALIDATE:  88%|████████▊ | 141/160 [03:32<01:07,  3.57s/it]
cache-cache_patch:transformers/inputs2 VALIDATE:  89%|████████▉ | 143/160 [03:32<00:48,  2.88s/it]
cache-cache_patch:transformers/inputs_empty_cache EXPORT:  89%|████████▉ | 143/160 [03:32<00:48,  2.88s/it]
cache-cache_patch:transformers/inputs_empty_cache EXPORT:  90%|█████████ | 144/160 [03:33<00:38,  2.40s/it]
cache-cache_patch:transformers/inputs_batch1 EXPORT:  90%|█████████ | 144/160 [03:33<00:38,  2.40s/it]
cache-cache_patch:transformers/inputs_batch1 EXPORT:  91%|█████████ | 145/160 [03:34<00:29,  1.97s/it]
cache-cache_patch:transformers-rt/inputs EXPORT:  91%|█████████ | 145/160 [03:34<00:29,  1.97s/it]    [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-rt/inputs VALIDATE:  91%|█████████ | 145/160 [03:37<00:29,  1.97s/it]
cache-cache_patch:transformers-rt/inputs VALIDATE:  91%|█████████▏| 146/160 [03:38<00:34,  2.48s/it]
cache-cache_patch:transformers-rt/inputs_prompt EXPORT:  91%|█████████▏| 146/160 [03:38<00:34,  2.48s/it]
cache-cache_patch:transformers-rt/inputs2 EXPORT:  91%|█████████▏| 146/160 [03:38<00:34,  2.48s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-rt/inputs2 VALIDATE:  91%|█████████▏| 146/160 [03:40<00:34,  2.48s/it]
cache-cache_patch:transformers-rt/inputs2 VALIDATE:  92%|█████████▎| 148/160 [03:40<00:23,  1.97s/it]
cache-cache_patch:transformers-rt/inputs_empty_cache EXPORT:  92%|█████████▎| 148/160 [03:40<00:23,  1.97s/it]
cache-cache_patch:transformers-rt/inputs_empty_cache EXPORT:  93%|█████████▎| 149/160 [03:41<00:18,  1.71s/it]
cache-cache_patch:transformers-rt/inputs_batch1 EXPORT:  93%|█████████▎| 149/160 [03:41<00:18,  1.71s/it]
cache-cache_patch:transformers-rt/inputs_batch1 EXPORT:  94%|█████████▍| 150/160 [03:42<00:14,  1.44s/it]
cache-cache_patch:transformers-oblivious/inputs EXPORT:  94%|█████████▍| 150/160 [03:42<00:14,  1.44s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious/inputs VALIDATE:  94%|█████████▍| 150/160 [03:45<00:14,  1.44s/it]
cache-cache_patch:transformers-oblivious/inputs VALIDATE:  94%|█████████▍| 151/160 [03:45<00:18,  2.05s/it]
cache-cache_patch:transformers-oblivious/inputs_prompt EXPORT:  94%|█████████▍| 151/160 [03:45<00:18,  2.05s/it]
cache-cache_patch:transformers-oblivious/inputs2 EXPORT:  94%|█████████▍| 151/160 [03:45<00:18,  2.05s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious/inputs2 VALIDATE:  94%|█████████▍| 151/160 [03:49<00:18,  2.05s/it]
cache-cache_patch:transformers-oblivious/inputs2 VALIDATE:  96%|█████████▌| 153/160 [03:49<00:13,  1.99s/it]
cache-cache_patch:transformers-oblivious/inputs_empty_cache EXPORT:  96%|█████████▌| 153/160 [03:49<00:13,  1.99s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious/inputs_empty_cache VALIDATE:  96%|█████████▌| 153/160 [03:56<00:13,  1.99s/it]
cache-cache_patch:transformers-oblivious/inputs_empty_cache VALIDATE:  96%|█████████▋| 154/160 [03:56<00:19,  3.17s/it]
cache-cache_patch:transformers-oblivious/inputs_batch1 EXPORT:  96%|█████████▋| 154/160 [03:56<00:19,  3.17s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious/inputs_batch1 VALIDATE:  96%|█████████▋| 154/160 [04:01<00:19,  3.17s/it]
cache-cache_patch:transformers-oblivious/inputs_batch1 VALIDATE:  97%|█████████▋| 155/160 [04:01<00:17,  3.59s/it]
cache-cache_patch:transformers-oblivious-rt/inputs EXPORT:  97%|█████████▋| 155/160 [04:01<00:17,  3.59s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious-rt/inputs VALIDATE:  97%|█████████▋| 155/160 [04:05<00:17,  3.59s/it]
cache-cache_patch:transformers-oblivious-rt/inputs VALIDATE:  98%|█████████▊| 156/160 [04:05<00:14,  3.72s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_prompt EXPORT:  98%|█████████▊| 156/160 [04:05<00:14,  3.72s/it]
cache-cache_patch:transformers-oblivious-rt/inputs2 EXPORT:  98%|█████████▊| 156/160 [04:05<00:14,  3.72s/it]      [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious-rt/inputs2 VALIDATE:  98%|█████████▊| 156/160 [04:09<00:14,  3.72s/it]
cache-cache_patch:transformers-oblivious-rt/inputs2 VALIDATE:  99%|█████████▉| 158/160 [04:09<00:05,  2.94s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_empty_cache EXPORT:  99%|█████████▉| 158/160 [04:09<00:05,  2.94s/it][torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 33 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious-rt/inputs_empty_cache VALIDATE:  99%|█████████▉| 158/160 [04:12<00:05,  2.94s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_empty_cache VALIDATE:  99%|█████████▉| 159/160 [04:12<00:02,  2.95s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_batch1 EXPORT:  99%|█████████▉| 159/160 [04:12<00:02,  2.95s/it]       [torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
Applied 32 of general pattern rewrite rules.
[torch.onnx] Optimize the ONNX graph... ✅

cache-cache_patch:transformers-oblivious-rt/inputs_batch1 VALIDATE:  99%|█████████▉| 159/160 [04:16<00:02,  2.95s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_batch1 VALIDATE: 100%|██████████| 160/160 [04:16<00:00,  3.24s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_batch1 VALIDATE: 100%|██████████| 160/160 [04:16<00:00,  1.60s/it]

Let’s save the results.

df = pandas.DataFrame(results)
df.to_excel("plot_export_tiny_llm_dim01_onnx.xlsx")
df
cache cache_patch oblivious rt export_with EXPORT ERR-EXPORT run_with WORKS ERR-RUN
0 0 0 0 0 inputs 0 Failed to decompose the FX graph for ONNX comp... NaN NaN NaN
1 0 0 0 0 inputs_prompt 0 When `dynamic_shapes` is specified as a dict, ... NaN NaN NaN
2 0 0 0 0 inputs2 0 Failed to decompose the FX graph for ONNX comp... NaN NaN NaN
3 0 0 0 0 inputs_empty_cache 0 Found the following conflicts between user-spe... NaN NaN NaN
4 0 0 0 0 inputs_batch1 0 Found the following conflicts between user-spe... NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ...
207 1 transformers 1 1 inputs_batch1 1 NaN inputs 1.0 NaN
208 1 transformers 1 1 inputs_batch1 1 NaN inputs_prompt 0.0 Not the same number of given inputs 1 and the ...
209 1 transformers 1 1 inputs_batch1 1 NaN inputs2 1.0 NaN
210 1 transformers 1 1 inputs_batch1 1 NaN inputs_empty_cache 1.0 NaN
211 1 transformers 1 1 inputs_batch1 1 NaN inputs_batch1 1.0 NaN

212 rows × 10 columns



no_export = df[df.EXPORT == 0]
no_export.to_excel("plot_export_tiny_llm_dim01_onnx.no_export.xlsx")
no_export
cache cache_patch oblivious rt export_with EXPORT ERR-EXPORT run_with WORKS ERR-RUN
0 0 0 0 0 inputs 0 Failed to decompose the FX graph for ONNX comp... NaN NaN NaN
1 0 0 0 0 inputs_prompt 0 When `dynamic_shapes` is specified as a dict, ... NaN NaN NaN
2 0 0 0 0 inputs2 0 Failed to decompose the FX graph for ONNX comp... NaN NaN NaN
3 0 0 0 0 inputs_empty_cache 0 Found the following conflicts between user-spe... NaN NaN NaN
4 0 0 0 0 inputs_batch1 0 Found the following conflicts between user-spe... NaN NaN NaN
... ... ... ... ... ... ... ... ... ... ...
162 1 transformers 0 1 inputs_prompt 0 When `dynamic_shapes` is specified as a dict, ... NaN NaN NaN
168 1 transformers 0 1 inputs_empty_cache 0 Found the following conflicts between user-spe... NaN NaN NaN
169 1 transformers 0 1 inputs_batch1 0 Found the following conflicts between user-spe... NaN NaN NaN
175 1 transformers 1 0 inputs_prompt 0 When `dynamic_shapes` is specified as a dict, ... NaN NaN NaN
196 1 transformers 1 1 inputs_prompt 0 When `dynamic_shapes` is specified as a dict, ... NaN NaN NaN

72 rows × 10 columns



The validation failures.

invalid = df[(df.EXPORT == 1) & (df.WORKS == 0)].pivot(
    index=["cache", "cache_patch", "oblivious", "rt", "export_with"],
    columns=["run_with"],
    values=["WORKS", "ERR-RUN"],
)
invalid.to_excel("plot_export_tiny_llm_dim01_onnx.invalid.xlsx")
invalid
WORKS ERR-RUN
run_with inputs inputs2 inputs_batch1 inputs_empty_cache inputs_prompt inputs inputs2 inputs_batch1 inputs_empty_cache inputs_prompt
cache cache_patch oblivious rt export_with
1 all 0 0 inputs NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs2 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs_batch1 0.0 0.0 NaN 0.0 0.0 [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... NaN [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... Not the same number of given inputs 1 and the ...
inputs_empty_cache 0.0 0.0 0.0 NaN 0.0 [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... NaN Not the same number of given inputs 1 and the ...
1 inputs NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs2 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs_batch1 0.0 0.0 NaN 0.0 0.0 [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... NaN [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... Not the same number of given inputs 1 and the ...
inputs_empty_cache 0.0 0.0 0.0 NaN 0.0 [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... [ONNXRuntimeError] : 2 : INVALID_ARGUMENT : Go... NaN Not the same number of given inputs 1 and the ...
1 0 inputs NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs2 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs_batch1 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs_empty_cache NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
1 inputs NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs2 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs_batch1 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs_empty_cache NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
transformers 0 0 inputs NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs2 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
1 inputs NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs2 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
1 0 inputs NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs2 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs_batch1 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs_empty_cache NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
1 inputs NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs2 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs_batch1 NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...
inputs_empty_cache NaN NaN NaN NaN 0.0 NaN NaN NaN NaN Not the same number of given inputs 1 and the ...


success = df[(df.EXPORT == 1) & (df.WORKS == 1)].pivot(
    index=["cache", "cache_patch", "oblivious", "rt", "export_with"],
    columns=["run_with"],
    values=["WORKS"],
)
success.to_excel("plot_export_tiny_llm_dim01_onnx.success.xlsx")
success
WORKS
run_with inputs inputs2 inputs_batch1 inputs_empty_cache
cache cache_patch oblivious rt export_with
1 all 0 0 inputs 1.0 1.0 1.0 1.0
inputs2 1.0 1.0 1.0 1.0
inputs_batch1 NaN NaN 1.0 NaN
inputs_empty_cache NaN NaN NaN 1.0
1 inputs 1.0 1.0 1.0 1.0
inputs2 1.0 1.0 1.0 1.0
inputs_batch1 NaN NaN 1.0 NaN
inputs_empty_cache NaN NaN NaN 1.0
1 0 inputs 1.0 1.0 1.0 1.0
inputs2 1.0 1.0 1.0 1.0
inputs_batch1 1.0 1.0 1.0 1.0
inputs_empty_cache 1.0 1.0 1.0 1.0
1 inputs 1.0 1.0 1.0 1.0
inputs2 1.0 1.0 1.0 1.0
inputs_batch1 1.0 1.0 1.0 1.0
inputs_empty_cache 1.0 1.0 1.0 1.0
transformers 0 0 inputs 1.0 1.0 1.0 1.0
inputs2 1.0 1.0 1.0 1.0
1 inputs 1.0 1.0 1.0 1.0
inputs2 1.0 1.0 1.0 1.0
1 0 inputs 1.0 1.0 1.0 1.0
inputs2 1.0 1.0 1.0 1.0
inputs_batch1 1.0 1.0 1.0 1.0
inputs_empty_cache 1.0 1.0 1.0 1.0
1 inputs 1.0 1.0 1.0 1.0
inputs2 1.0 1.0 1.0 1.0
inputs_batch1 1.0 1.0 1.0 1.0
inputs_empty_cache 1.0 1.0 1.0 1.0


If you have any error, then look at example Export Tiny-LLM with patches.

doc.plot_legend("Tiny-LLM\nexport with\ndimension in {0,1}", "torch.onnx.export", "tomato")
plot export tiny llm dim01 onnx

Total running time of the script: (4 minutes 22.009 seconds)

Related examples

Export with dynamic dimensions in {0,1} into ONNX (custom)

Export with dynamic dimensions in {0,1} into ONNX (custom)

Export with dynamic dimensions in {0,1}

Export with dynamic dimensions in {0,1}

Export microsoft/phi-2

Export microsoft/phi-2

Gallery generated by Sphinx-Gallery