Note
Go to the end to download the full example code.
Export with dynamic dimensions in {0,1}
into ONNX¶
This duplicates the example Export with dynamic dimensions in {0,1} but for
torch.onnx.export()
. It checks what inputs can be used to export
and with which inputs it can work.
Available input sets¶
import itertools
from tqdm import tqdm
import numpy as np
import pandas
import torch
import onnxruntime
from onnx_diagnostic import doc
from onnx_diagnostic.helpers import max_diff, string_type, flatten_object
from onnx_diagnostic.helpers.torch_helper import torch_deepcopy
from onnx_diagnostic.helpers.rt_helper import make_feeds
from onnx_diagnostic.torch_models.hghub.model_inputs import get_untrained_model_with_inputs
from onnx_diagnostic.torch_export_patches.patch_inputs import use_dyn_not_str
from onnx_diagnostic.torch_export_patches import (
torch_export_patches,
register_additional_serialization_functions,
)
data = get_untrained_model_with_inputs("arnir0/Tiny-LLM", add_second_input=True)
model, dynamic_shapes = data["model"], data["dynamic_shapes"]
The trained model can be obtained with:
MODEL_NAME = "arnir0/Tiny-LLM"
tokenizer = transformers.AutoTokenizer.from_pretrained(MODEL_NAME)
model = transformers.AutoModelForCausalLM.from_pretrained(MODEL_NAME)
input_sets = {k: v for k, v in data.items() if k.startswith("inputs")}
for k, v in input_sets.items():
print(f"{k:20}: {string_type(v, with_shape=True)}")
inputs : dict(input_ids:T7s2x3,attention_mask:T7s2x33,position_ids:T7s2x3,past_key_values:DynamicCache(key_cache=#1[T1s2x1x30x96], value_cache=#1[T1s2x1x30x96]))
inputs2 : dict(input_ids:T7s3x4,attention_mask:T7s3x35,position_ids:T7s3x4,past_key_values:DynamicCache(key_cache=#1[T1s3x1x31x96], value_cache=#1[T1s3x1x31x96]))
inputs_empty_cache : dict(input_ids:T7s2x3,attention_mask:T7s2x3,position_ids:T7s2x3,past_key_values:DynamicCache(key_cache=#1[T1s2x1x0x96], value_cache=#1[T1s2x1x0x96]))
inputs_batch1 : dict(input_ids:T7s1x3,attention_mask:T7s1x33,position_ids:T7s1x3,past_key_values:DynamicCache(key_cache=#1[T1s1x1x30x96], value_cache=#1[T1s1x1x30x96]))
The dynamic shapes are:
print(f"dynamic_shapes: {string_type(dynamic_shapes)}")
dynamic_shapes: dict(input_ids:{0:DYN(batch),1:DYN(seq_length)},attention_mask:{0:DYN(batch),1:DYN(cache+seq)},position_ids:{0:DYN(batch),1:DYN(cache+seq)},past_key_values:#2[#1[{0:DYN(batch),2:DYN(cache_length)}],#1[{0:DYN(batch),2:DYN(cache_length)}]])
Let’s check they all work and compute the expected values. We use deepcopy because caches are usually modified inplace.
expected = {}
for k, v in input_sets.items():
expected[k] = model(**torch_deepcopy(v))
print(f"{k:20}: {string_type(expected[k], with_shape=True)}")
inputs : CausalLMOutputWithPast(logits:T1s2x3x32000,past_key_values:DynamicCache(key_cache=#1[T1s2x1x33x96], value_cache=#1[T1s2x1x33x96]))
inputs2 : CausalLMOutputWithPast(logits:T1s3x4x32000,past_key_values:DynamicCache(key_cache=#1[T1s3x1x35x96], value_cache=#1[T1s3x1x35x96]))
inputs_empty_cache : CausalLMOutputWithPast(logits:T1s2x3x32000,past_key_values:DynamicCache(key_cache=#1[T1s2x1x3x96], value_cache=#1[T1s2x1x3x96]))
inputs_batch1 : CausalLMOutputWithPast(logits:T1s1x3x32000,past_key_values:DynamicCache(key_cache=#1[T1s1x1x33x96], value_cache=#1[T1s1x1x33x96]))
Export with options¶
We try to export with the following options:
cache registration: register cache serialization with
onnx_diagnostic.torch_export_patches.register_additional_serialization_functions()
oblivious: an option to remove some the exception raises by the exporter
rt: see
prefer_deferred_runtime_asserts_over_guards
intorch.export.export()
cache_patch: patches the model before exporting with
onnx_diagnostic.torch_export_patches.torch_export_patches()
Some function first.
def export_model(
model, dynamic_shapes, inputs, cache=False, oblivious=False, rt=False, cache_patch=False
):
if cache and not cache_patch:
with register_additional_serialization_functions(patch_transformers=True):
return export_model(model, dynamic_shapes, inputs, oblivious=oblivious, rt=rt)
if cache_patch:
with torch_export_patches(
patch_torch=cache_patch in ("all", "torch", True, 1),
patch_transformers=cache_patch in ("all", "transformers", True, 1),
):
return export_model(model, dynamic_shapes, inputs, oblivious=oblivious, rt=rt)
if oblivious:
with torch.fx.experimental._config.patch(backed_size_oblivious=True):
return export_model(model, dynamic_shapes, inputs, rt=rt)
ep = torch.export.export(
model,
(),
inputs,
dynamic_shapes=use_dyn_not_str(dynamic_shapes),
prefer_deferred_runtime_asserts_over_guards=rt,
)
return torch.onnx.export(ep, args=(), kwargs=inputs, dynamic_shapes=dynamic_shapes)
def try_export_model(
model, dynamic_shapes, inputs, cache=False, oblivious=False, rt=False, cache_patch=False
):
try:
return export_model(
model,
dynamic_shapes,
inputs,
cache=cache,
oblivious=oblivious,
rt=rt,
cache_patch=cache_patch,
)
except Exception as e:
return e
def validation(ep, input_sets, expected, catch_exception=True):
sess = onnxruntime.InferenceSession(
ep.model_proto.SerializeToString(), providers=["CPUExecutionProvider"]
)
for k, v in input_sets.items():
feeds = make_feeds(sess, torch_deepcopy(v), use_numpy=True)
try:
got = sess.run(None, feeds)
except Exception as e:
if not catch_exception:
raise
yield k, e
continue
yield k, max_diff(flatten_object(expected[k], drop_keys=True), got)
Verification an example known to be working is.
ep = export_model(
model,
dynamic_shapes,
torch_deepcopy(input_sets["inputs"]),
cache_patch=True,
)
res = list(validation(ep, dict(inputs=input_sets["inputs"]), expected, catch_exception=False))
assert res[0][1]["abs"] < 1e-5, f"Unexpected issue with res={res}"
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
The main loop¶
results = []
possibilities = [*[[0, 1] for _ in range(4)], list(input_sets)]
possibilities[1] = [0, "all", "torch", "transformers"]
with tqdm(list(itertools.product(*possibilities))) as pbar:
for cache, cache_patch, oblivious, rt, inputs in pbar:
if cache_patch and not cache:
# patches include caches.
continue
kwargs = dict(cache=cache, cache_patch=cache_patch, oblivious=oblivious, rt=rt)
legend = "-".join(
(k if isinstance(v, int) else f"{k}:{v}") for k, v in kwargs.items() if v
)
legend = f"{legend}/{inputs}"
pbar.set_description(f"{legend} EXPORT")
# export
ep = try_export_model(
model, dynamic_shapes, torch_deepcopy(input_sets[inputs]), **kwargs
)
if isinstance(ep, Exception):
obs = {
**kwargs,
"export_with": inputs,
"EXPORT": 0,
"ERR-EXPORT": str(ep).split("\n")[0],
}
results.append(obs)
continue
pbar.set_description(f"{legend} VALIDATE")
common = {**kwargs, "export_with": inputs, "EXPORT": 1}
for inp, res in validation(ep, input_sets, expected):
if isinstance(res, Exception):
obs = {
**common,
"run_with": inp,
"ERR-RUN": str(res).split("\n")[0],
"WORKS": 0,
}
else:
obs = {
**common,
"run_with": inp,
"WORKS": int(~np.isnan(res["abs"]) and res["abs"] < 1e-3),
}
results.append(obs)
0%| | 0/128 [00:00<?, ?it/s]
/inputs EXPORT: 0%| | 0/128 [00:00<?, ?it/s]
/inputs EXPORT: 1%| | 1/128 [00:00<00:24, 5.11it/s]
/inputs2 EXPORT: 1%| | 1/128 [00:00<00:24, 5.11it/s]
/inputs2 EXPORT: 2%|▏ | 2/128 [00:00<00:23, 5.35it/s]
/inputs_empty_cache EXPORT: 2%|▏ | 2/128 [00:00<00:23, 5.35it/s]
/inputs_empty_cache EXPORT: 2%|▏ | 3/128 [00:00<00:21, 5.77it/s]
/inputs_batch1 EXPORT: 2%|▏ | 3/128 [00:00<00:21, 5.77it/s]
/inputs_batch1 EXPORT: 3%|▎ | 4/128 [00:00<00:23, 5.23it/s]
rt/inputs EXPORT: 3%|▎ | 4/128 [00:00<00:23, 5.23it/s]
rt/inputs EXPORT: 4%|▍ | 5/128 [00:00<00:23, 5.33it/s]
rt/inputs2 EXPORT: 4%|▍ | 5/128 [00:00<00:23, 5.33it/s]
rt/inputs2 EXPORT: 5%|▍ | 6/128 [00:01<00:22, 5.40it/s]
rt/inputs_empty_cache EXPORT: 5%|▍ | 6/128 [00:01<00:22, 5.40it/s]
rt/inputs_empty_cache EXPORT: 5%|▌ | 7/128 [00:01<00:21, 5.66it/s]
rt/inputs_batch1 EXPORT: 5%|▌ | 7/128 [00:01<00:21, 5.66it/s]
rt/inputs_batch1 EXPORT: 6%|▋ | 8/128 [00:01<00:23, 5.09it/s]
oblivious/inputs EXPORT: 6%|▋ | 8/128 [00:01<00:23, 5.09it/s]
oblivious/inputs EXPORT: 7%|▋ | 9/128 [00:01<00:24, 4.89it/s]
oblivious/inputs2 EXPORT: 7%|▋ | 9/128 [00:01<00:24, 4.89it/s]
oblivious/inputs2 EXPORT: 8%|▊ | 10/128 [00:01<00:23, 4.96it/s]
oblivious/inputs_empty_cache EXPORT: 8%|▊ | 10/128 [00:01<00:23, 4.96it/s]
oblivious/inputs_empty_cache EXPORT: 9%|▊ | 11/128 [00:02<00:23, 5.01it/s]
oblivious/inputs_batch1 EXPORT: 9%|▊ | 11/128 [00:02<00:23, 5.01it/s]
oblivious/inputs_batch1 EXPORT: 9%|▉ | 12/128 [00:02<00:22, 5.13it/s]
oblivious-rt/inputs EXPORT: 9%|▉ | 12/128 [00:02<00:22, 5.13it/s]
oblivious-rt/inputs EXPORT: 10%|█ | 13/128 [00:02<00:21, 5.24it/s]
oblivious-rt/inputs2 EXPORT: 10%|█ | 13/128 [00:02<00:21, 5.24it/s]
oblivious-rt/inputs2 EXPORT: 11%|█ | 14/128 [00:02<00:21, 5.28it/s]
oblivious-rt/inputs_empty_cache EXPORT: 11%|█ | 14/128 [00:02<00:21, 5.28it/s]
oblivious-rt/inputs_empty_cache EXPORT: 12%|█▏ | 15/128 [00:02<00:21, 5.26it/s]
oblivious-rt/inputs_batch1 EXPORT: 12%|█▏ | 15/128 [00:02<00:21, 5.26it/s]
oblivious-rt/inputs_batch1 EXPORT: 12%|█▎ | 16/128 [00:03<00:21, 5.14it/s]
cache/inputs EXPORT: 12%|█▎ | 16/128 [00:03<00:21, 5.14it/s]
cache/inputs EXPORT: 51%|█████ | 65/128 [00:03<00:00, 80.44it/s]
cache/inputs2 EXPORT: 51%|█████ | 65/128 [00:03<00:00, 80.44it/s]
cache/inputs_empty_cache EXPORT: 51%|█████ | 65/128 [00:03<00:00, 80.44it/s]
cache/inputs_batch1 EXPORT: 51%|█████ | 65/128 [00:03<00:00, 80.44it/s]
cache-rt/inputs EXPORT: 51%|█████ | 65/128 [00:03<00:00, 80.44it/s]
cache-rt/inputs2 EXPORT: 51%|█████ | 65/128 [00:04<00:00, 80.44it/s]
cache-rt/inputs_empty_cache EXPORT: 51%|█████ | 65/128 [00:04<00:00, 80.44it/s]
cache-rt/inputs_batch1 EXPORT: 51%|█████ | 65/128 [00:04<00:00, 80.44it/s]
cache-oblivious/inputs EXPORT: 51%|█████ | 65/128 [00:04<00:00, 80.44it/s]
cache-oblivious/inputs2 EXPORT: 51%|█████ | 65/128 [00:04<00:00, 80.44it/s]
cache-oblivious/inputs2 EXPORT: 58%|█████▊ | 74/128 [00:04<00:02, 20.86it/s]
cache-oblivious/inputs_empty_cache EXPORT: 58%|█████▊ | 74/128 [00:04<00:02, 20.86it/s]
cache-oblivious/inputs_batch1 EXPORT: 58%|█████▊ | 74/128 [00:05<00:02, 20.86it/s]
cache-oblivious-rt/inputs EXPORT: 58%|█████▊ | 74/128 [00:05<00:02, 20.86it/s]
cache-oblivious-rt/inputs2 EXPORT: 58%|█████▊ | 74/128 [00:05<00:02, 20.86it/s]
cache-oblivious-rt/inputs_empty_cache EXPORT: 58%|█████▊ | 74/128 [00:06<00:02, 20.86it/s]
cache-oblivious-rt/inputs_batch1 EXPORT: 58%|█████▊ | 74/128 [00:07<00:02, 20.86it/s]
cache-oblivious-rt/inputs_batch1 EXPORT: 62%|██████▎ | 80/128 [00:07<00:04, 9.65it/s]
cache-cache_patch:all/inputs EXPORT: 62%|██████▎ | 80/128 [00:07<00:04, 9.65it/s] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:all/inputs VALIDATE: 62%|██████▎ | 80/128 [00:11<00:04, 9.65it/s]
cache-cache_patch:all/inputs2 EXPORT: 62%|██████▎ | 80/128 [00:11<00:04, 9.65it/s] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:all/inputs2 VALIDATE: 62%|██████▎ | 80/128 [00:15<00:04, 9.65it/s]
cache-cache_patch:all/inputs_empty_cache EXPORT: 62%|██████▎ | 80/128 [00:15<00:04, 9.65it/s][torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 47 of general pattern rewrite rules.
cache-cache_patch:all/inputs_empty_cache VALIDATE: 62%|██████▎ | 80/128 [00:20<00:04, 9.65it/s]
cache-cache_patch:all/inputs_batch1 EXPORT: 62%|██████▎ | 80/128 [00:20<00:04, 9.65it/s] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 17 of general pattern rewrite rules.
cache-cache_patch:all/inputs_batch1 VALIDATE: 62%|██████▎ | 80/128 [00:24<00:04, 9.65it/s]
cache-cache_patch:all-rt/inputs EXPORT: 62%|██████▎ | 80/128 [00:24<00:04, 9.65it/s] [torch.onnx] Run decomposition...
cache-cache_patch:all-rt/inputs EXPORT: 62%|██████▎ | 80/128 [00:25<00:04, 9.65it/s][torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:all-rt/inputs VALIDATE: 62%|██████▎ | 80/128 [00:26<00:04, 9.65it/s]
cache-cache_patch:all-rt/inputs VALIDATE: 66%|██████▋ | 85/128 [00:27<00:30, 1.39it/s]
cache-cache_patch:all-rt/inputs2 EXPORT: 66%|██████▋ | 85/128 [00:27<00:30, 1.39it/s] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:all-rt/inputs2 VALIDATE: 66%|██████▋ | 85/128 [00:31<00:30, 1.39it/s]
cache-cache_patch:all-rt/inputs2 VALIDATE: 67%|██████▋ | 86/128 [00:31<00:37, 1.13it/s]
cache-cache_patch:all-rt/inputs_empty_cache EXPORT: 67%|██████▋ | 86/128 [00:31<00:37, 1.13it/s][torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 47 of general pattern rewrite rules.
cache-cache_patch:all-rt/inputs_empty_cache VALIDATE: 67%|██████▋ | 86/128 [00:36<00:37, 1.13it/s]
cache-cache_patch:all-rt/inputs_batch1 EXPORT: 67%|██████▋ | 86/128 [00:37<00:37, 1.13it/s] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 17 of general pattern rewrite rules.
cache-cache_patch:all-rt/inputs_batch1 VALIDATE: 67%|██████▋ | 86/128 [00:40<00:37, 1.13it/s]
cache-cache_patch:all-oblivious/inputs EXPORT: 67%|██████▋ | 86/128 [00:40<00:37, 1.13it/s] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:all-oblivious/inputs VALIDATE: 67%|██████▋ | 86/128 [00:44<00:37, 1.13it/s]
cache-cache_patch:all-oblivious/inputs VALIDATE: 70%|██████▉ | 89/128 [00:44<00:56, 1.44s/it]
cache-cache_patch:all-oblivious/inputs2 EXPORT: 70%|██████▉ | 89/128 [00:44<00:56, 1.44s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:all-oblivious/inputs2 VALIDATE: 70%|██████▉ | 89/128 [00:50<00:56, 1.44s/it]
cache-cache_patch:all-oblivious/inputs2 VALIDATE: 70%|███████ | 90/128 [00:50<01:05, 1.73s/it]
cache-cache_patch:all-oblivious/inputs_empty_cache EXPORT: 70%|███████ | 90/128 [00:50<01:05, 1.73s/it][torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:all-oblivious/inputs_empty_cache VALIDATE: 70%|███████ | 90/128 [00:54<01:05, 1.73s/it]
cache-cache_patch:all-oblivious/inputs_batch1 EXPORT: 70%|███████ | 90/128 [00:54<01:05, 1.73s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ❌
cache-cache_patch:all-oblivious-rt/inputs EXPORT: 70%|███████ | 90/128 [00:56<01:05, 1.73s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:all-oblivious-rt/inputs VALIDATE: 70%|███████ | 90/128 [00:58<01:05, 1.73s/it]
cache-cache_patch:all-oblivious-rt/inputs VALIDATE: 73%|███████▎ | 93/128 [00:59<01:10, 2.02s/it]
cache-cache_patch:all-oblivious-rt/inputs2 EXPORT: 73%|███████▎ | 93/128 [00:59<01:10, 2.02s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:all-oblivious-rt/inputs2 VALIDATE: 73%|███████▎ | 93/128 [01:04<01:10, 2.02s/it]
cache-cache_patch:all-oblivious-rt/inputs_empty_cache EXPORT: 73%|███████▎ | 93/128 [01:05<01:10, 2.02s/it][torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:all-oblivious-rt/inputs_empty_cache VALIDATE: 73%|███████▎ | 93/128 [01:09<01:10, 2.02s/it]
cache-cache_patch:all-oblivious-rt/inputs_empty_cache VALIDATE: 74%|███████▍ | 95/128 [01:09<01:25, 2.60s/it]
cache-cache_patch:all-oblivious-rt/inputs_batch1 EXPORT: 74%|███████▍ | 95/128 [01:09<01:25, 2.60s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ❌
cache-cache_patch:all-oblivious-rt/inputs_batch1 EXPORT: 75%|███████▌ | 96/128 [01:13<01:29, 2.78s/it]
cache-cache_patch:torch/inputs EXPORT: 75%|███████▌ | 96/128 [01:13<01:29, 2.78s/it]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch/inputs2 EXPORT: 75%|███████▌ | 96/128 [01:14<01:29, 2.78s/it]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch/inputs2 EXPORT: 77%|███████▋ | 98/128 [01:14<01:04, 2.16s/it]
cache-cache_patch:torch/inputs_empty_cache EXPORT: 77%|███████▋ | 98/128 [01:14<01:04, 2.16s/it]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, 0, 96]", arg17_1: "f32[s4, 1, 0, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, 0, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = cat = None
cat_1: "f32[s4, 1, 0, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s70)" = 0 + sym_size_int; sym_size_int = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(0, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s70)" = 0 + sym_size_int_1
add_2: "Sym(s70)" = add_1 + 0
sym_size_int_2: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(-s53 + s70)" = add_2 - sym_size_int_2; add_2 = None
gt: "Sym(-s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s70)" = sym_size_int_2 > add_1; sym_size_int_2 = gt_1 = None
arange_1: "i64[s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_3: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_3, device = device(type='cpu'), pin_memory = False); sym_size_int_3 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_4, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_4, sym_size_int_1]); unsqueeze_1 = sym_size_int_1 = expand_1 = None
unsqueeze_2: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_5: "Sym(s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_4, sym_size_int_5]); unsqueeze_2 = sym_size_int_4 = sym_size_int_5 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, 0, 96]", arg17_1: "f32[s4, 1, 0, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, 0, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = cat = None
cat_1: "f32[s4, 1, 0, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s70)" = 0 + sym_size_int; sym_size_int = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(0, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s70)" = 0 + sym_size_int_1
add_2: "Sym(s70)" = add_1 + 0
sym_size_int_2: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(-s53 + s70)" = add_2 - sym_size_int_2; add_2 = None
gt: "Sym(-s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s70)" = sym_size_int_2 > add_1; sym_size_int_2 = gt_1 = None
arange_1: "i64[s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_3: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_3, device = device(type='cpu'), pin_memory = False); sym_size_int_3 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_4, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_4, sym_size_int_1]); unsqueeze_1 = sym_size_int_1 = expand_1 = None
unsqueeze_2: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_5: "Sym(s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_4, sym_size_int_5]); unsqueeze_2 = sym_size_int_4 = sym_size_int_5 = expand_2 = None
cache-cache_patch:torch/inputs_empty_cache EXPORT: 77%|███████▋ | 99/128 [01:14<00:54, 1.87s/it]
cache-cache_patch:torch/inputs_batch1 EXPORT: 77%|███████▋ | 99/128 [01:14<00:54, 1.87s/it]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[1, s70]", arg14_1: "i64[1, s53]", arg15_1: "i64[1, s9]", arg16_1: "f32[1, 1, s31, 96]", arg17_1: "f32[1, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[1, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[1, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[1, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1); arg13_1 = None
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[1, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
arange_2: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[1]" = torch.ops.aten.movedim.int(arange_2, 0, 0); arange_2 = None
select: "i64[]" = torch.ops.aten.select.int(movedim, 0, 0); movedim = None
movedim_1: "i64[1]" = torch.ops.aten.movedim.int(arange_3, 0, 0); arange_3 = None
select_1: "i64[]" = torch.ops.aten.select.int(movedim_1, 0, 0); movedim_1 = None
movedim_2: "i64[s70]" = torch.ops.aten.movedim.int(arange, 0, 0); arange = movedim_2 = None
unsqueeze: "i64[1]" = torch.ops.aten.unsqueeze.default(select, 0); select = None
expand: "i64[s70]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_2]); unsqueeze = expand = None
unsqueeze_1: "i64[1]" = torch.ops.aten.unsqueeze.default(select_1, 0); select_1 = None
expand_1: "i64[s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_2]); unsqueeze_1 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_4: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s70, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_2, sym_size_int_4]); unsqueeze_2 = sym_size_int_2 = sym_size_int_4 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[1, s70]", arg14_1: "i64[1, s53]", arg15_1: "i64[1, s9]", arg16_1: "f32[1, 1, s31, 96]", arg17_1: "f32[1, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[1, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[1, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[1, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1); arg13_1 = None
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[1, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
arange_2: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[1]" = torch.ops.aten.movedim.int(arange_2, 0, 0); arange_2 = None
select: "i64[]" = torch.ops.aten.select.int(movedim, 0, 0); movedim = None
movedim_1: "i64[1]" = torch.ops.aten.movedim.int(arange_3, 0, 0); arange_3 = None
select_1: "i64[]" = torch.ops.aten.select.int(movedim_1, 0, 0); movedim_1 = None
movedim_2: "i64[s70]" = torch.ops.aten.movedim.int(arange, 0, 0); arange = movedim_2 = None
unsqueeze: "i64[1]" = torch.ops.aten.unsqueeze.default(select, 0); select = None
expand: "i64[s70]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_2]); unsqueeze = expand = None
unsqueeze_1: "i64[1]" = torch.ops.aten.unsqueeze.default(select_1, 0); select_1 = None
expand_1: "i64[s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_2]); unsqueeze_1 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_4: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s70, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_2, sym_size_int_4]); unsqueeze_2 = sym_size_int_2 = sym_size_int_4 = expand_2 = None
cache-cache_patch:torch/inputs_batch1 EXPORT: 78%|███████▊ | 100/128 [01:14<00:43, 1.57s/it]
cache-cache_patch:torch-rt/inputs EXPORT: 78%|███████▊ | 100/128 [01:14<00:43, 1.57s/it]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch-rt/inputs EXPORT: 79%|███████▉ | 101/128 [01:15<00:35, 1.30s/it]
cache-cache_patch:torch-rt/inputs2 EXPORT: 79%|███████▉ | 101/128 [01:15<00:35, 1.30s/it]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch-rt/inputs2 EXPORT: 80%|███████▉ | 102/128 [01:15<00:27, 1.05s/it]
cache-cache_patch:torch-rt/inputs_empty_cache EXPORT: 80%|███████▉ | 102/128 [01:15<00:27, 1.05s/it]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, 0, 96]", arg17_1: "f32[s4, 1, 0, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, 0, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = cat = None
cat_1: "f32[s4, 1, 0, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s70)" = 0 + sym_size_int; sym_size_int = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(0, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s70)" = 0 + sym_size_int_1
add_2: "Sym(s70)" = add_1 + 0
sym_size_int_2: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(-s53 + s70)" = add_2 - sym_size_int_2; add_2 = None
gt: "Sym(-s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s70)" = sym_size_int_2 > add_1; sym_size_int_2 = gt_1 = None
arange_1: "i64[s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_3: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_3, device = device(type='cpu'), pin_memory = False); sym_size_int_3 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_4, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_4, sym_size_int_1]); unsqueeze_1 = sym_size_int_1 = expand_1 = None
unsqueeze_2: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_5: "Sym(s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_4, sym_size_int_5]); unsqueeze_2 = sym_size_int_4 = sym_size_int_5 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, 0, 96]", arg17_1: "f32[s4, 1, 0, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, 0, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = cat = None
cat_1: "f32[s4, 1, 0, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s70)" = 0 + sym_size_int; sym_size_int = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(0, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s70)" = 0 + sym_size_int_1
add_2: "Sym(s70)" = add_1 + 0
sym_size_int_2: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(-s53 + s70)" = add_2 - sym_size_int_2; add_2 = None
gt: "Sym(-s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s70)" = sym_size_int_2 > add_1; sym_size_int_2 = gt_1 = None
arange_1: "i64[s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_3: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_3, device = device(type='cpu'), pin_memory = False); sym_size_int_3 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_4, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_4, sym_size_int_1]); unsqueeze_1 = sym_size_int_1 = expand_1 = None
unsqueeze_2: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_5: "Sym(s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_4, sym_size_int_5]); unsqueeze_2 = sym_size_int_4 = sym_size_int_5 = expand_2 = None
cache-cache_patch:torch-rt/inputs_empty_cache EXPORT: 80%|████████ | 103/128 [01:15<00:21, 1.18it/s]
cache-cache_patch:torch-rt/inputs_batch1 EXPORT: 80%|████████ | 103/128 [01:15<00:21, 1.18it/s]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[1, s70]", arg14_1: "i64[1, s53]", arg15_1: "i64[1, s9]", arg16_1: "f32[1, 1, s31, 96]", arg17_1: "f32[1, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[1, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[1, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[1, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1); arg13_1 = None
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[1, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
arange_2: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[1]" = torch.ops.aten.movedim.int(arange_2, 0, 0); arange_2 = None
select: "i64[]" = torch.ops.aten.select.int(movedim, 0, 0); movedim = None
movedim_1: "i64[1]" = torch.ops.aten.movedim.int(arange_3, 0, 0); arange_3 = None
select_1: "i64[]" = torch.ops.aten.select.int(movedim_1, 0, 0); movedim_1 = None
movedim_2: "i64[s70]" = torch.ops.aten.movedim.int(arange, 0, 0); arange = movedim_2 = None
unsqueeze: "i64[1]" = torch.ops.aten.unsqueeze.default(select, 0); select = None
expand: "i64[s70]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_2]); unsqueeze = expand = None
unsqueeze_1: "i64[1]" = torch.ops.aten.unsqueeze.default(select_1, 0); select_1 = None
expand_1: "i64[s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_2]); unsqueeze_1 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_4: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s70, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_2, sym_size_int_4]); unsqueeze_2 = sym_size_int_2 = sym_size_int_4 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[1, s70]", arg14_1: "i64[1, s53]", arg15_1: "i64[1, s9]", arg16_1: "f32[1, 1, s31, 96]", arg17_1: "f32[1, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[1, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[1, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[1, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1); arg13_1 = None
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[1, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
arange_2: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[1]" = torch.ops.aten.movedim.int(arange_2, 0, 0); arange_2 = None
select: "i64[]" = torch.ops.aten.select.int(movedim, 0, 0); movedim = None
movedim_1: "i64[1]" = torch.ops.aten.movedim.int(arange_3, 0, 0); arange_3 = None
select_1: "i64[]" = torch.ops.aten.select.int(movedim_1, 0, 0); movedim_1 = None
movedim_2: "i64[s70]" = torch.ops.aten.movedim.int(arange, 0, 0); arange = movedim_2 = None
unsqueeze: "i64[1]" = torch.ops.aten.unsqueeze.default(select, 0); select = None
expand: "i64[s70]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_2]); unsqueeze = expand = None
unsqueeze_1: "i64[1]" = torch.ops.aten.unsqueeze.default(select_1, 0); select_1 = None
expand_1: "i64[s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_2]); unsqueeze_1 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_4: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s70, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_2, sym_size_int_4]); unsqueeze_2 = sym_size_int_2 = sym_size_int_4 = expand_2 = None
cache-cache_patch:torch-rt/inputs_batch1 EXPORT: 81%|████████▏ | 104/128 [01:15<00:16, 1.46it/s]
cache-cache_patch:torch-oblivious/inputs EXPORT: 81%|████████▏ | 104/128 [01:15<00:16, 1.46it/s]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch-oblivious/inputs EXPORT: 82%|████████▏ | 105/128 [01:15<00:12, 1.79it/s]
cache-cache_patch:torch-oblivious/inputs2 EXPORT: 82%|████████▏ | 105/128 [01:15<00:12, 1.79it/s]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch-oblivious/inputs2 EXPORT: 83%|████████▎ | 106/128 [01:16<00:10, 2.16it/s]
cache-cache_patch:torch-oblivious/inputs_empty_cache EXPORT: 83%|████████▎ | 106/128 [01:16<00:10, 2.16it/s]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch-oblivious/inputs_empty_cache EXPORT: 84%|████████▎ | 107/128 [01:16<00:08, 2.52it/s]
cache-cache_patch:torch-oblivious/inputs_batch1 EXPORT: 84%|████████▎ | 107/128 [01:16<00:08, 2.52it/s]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch-oblivious/inputs_batch1 EXPORT: 84%|████████▍ | 108/128 [01:16<00:07, 2.84it/s]
cache-cache_patch:torch-oblivious-rt/inputs EXPORT: 84%|████████▍ | 108/128 [01:16<00:07, 2.84it/s]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch-oblivious-rt/inputs EXPORT: 85%|████████▌ | 109/128 [01:16<00:06, 3.07it/s]
cache-cache_patch:torch-oblivious-rt/inputs2 EXPORT: 85%|████████▌ | 109/128 [01:16<00:06, 3.07it/s]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch-oblivious-rt/inputs2 EXPORT: 86%|████████▌ | 110/128 [01:17<00:05, 3.35it/s]
cache-cache_patch:torch-oblivious-rt/inputs_empty_cache EXPORT: 86%|████████▌ | 110/128 [01:17<00:05, 3.35it/s]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch-oblivious-rt/inputs_empty_cache EXPORT: 87%|████████▋ | 111/128 [01:17<00:04, 3.56it/s]
cache-cache_patch:torch-oblivious-rt/inputs_batch1 EXPORT: 87%|████████▋ | 111/128 [01:17<00:04, 3.56it/s]
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
def forward(self, arg0_1: "f32[32000, 192]", arg1_1: "f32[192, 192]", arg2_1: "f32[96, 192]", arg3_1: "f32[96, 192]", arg4_1: "f32[192, 192]", arg5_1: "f32[1024, 192]", arg6_1: "f32[1024, 192]", arg7_1: "f32[192, 1024]", arg8_1: "f32[192]", arg9_1: "f32[192]", arg10_1: "f32[192]", arg11_1: "f32[32000, 192]", arg12_1: "f32[48]", arg13_1: "i64[s72, s70]", arg14_1: "i64[s43, s53]", arg15_1: "i64[s44, s9]", arg16_1: "f32[s23, 1, s31, 96]", arg17_1: "f32[s4, 1, s11, 96]"):
# No stacktrace found for following nodes
_tensor_constant0: "f32[0]" = self._tensor_constant0
lift_fresh_copy: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant0); _tensor_constant0 = None
detach_: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy); lift_fresh_copy = None
_tensor_constant1: "f32[0]" = self._tensor_constant1
lift_fresh_copy_1: "f32[0]" = torch.ops.aten.lift_fresh_copy.default(_tensor_constant1); _tensor_constant1 = None
detach__1: "f32[0]" = torch.ops.aten.detach_.default(lift_fresh_copy_1); lift_fresh_copy_1 = None
cat: "f32[s23, 1, s31, 96]" = torch.ops.aten.cat.default([detach_, arg16_1], -2); detach_ = arg16_1 = None
cat_1: "f32[s4, 1, s11, 96]" = torch.ops.aten.cat.default([detach__1, arg17_1], -2); detach__1 = arg17_1 = cat_1 = None
# File: ~/vv/this312/lib/python3.12/site-packages/torch/nn/modules/sparse.py:192 in forward, code: return F.embedding(
embedding: "f32[s72, s70, 192]" = torch.ops.aten.embedding.default(arg0_1, arg13_1); arg0_1 = embedding = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:371 in forward, code: past_seen_tokens = past_key_values.get_seq_length() if past_key_values is not None else 0
sym_numel_default: "Sym(96*s23*s31)" = torch.ops.aten.sym_numel.default(cat)
eq: "Sym(False)" = sym_numel_default == 0; eq = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:373 in forward, code: past_seen_tokens, past_seen_tokens + inputs_embeds.shape[1], device=inputs_embeds.device
sym_size_int: "Sym(s31)" = torch.ops.aten.sym_size.int(cat, 2); cat = None
sym_size_int_1: "Sym(s70)" = torch.ops.aten.sym_size.int(arg13_1, 1)
add: "Sym(s31 + s70)" = sym_size_int + sym_size_int_1; sym_size_int_1 = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:372 in forward, code: cache_position: torch.Tensor = torch.arange(
arange: "i64[s70]" = torch.ops.aten.arange.start(sym_size_int, add, device = device(type='cpu'), pin_memory = False); add = None
# File: ~/github/transformers/src/transformers/models/llama/modeling_llama.py:379 in forward, code: causal_mask = create_causal_mask(
to: "b8[s43, s53]" = torch.ops.aten.to.device(arg14_1, device(type='cpu'), torch.bool); to = None
eq_1: "Sym(False)" = sym_numel_default == 0; sym_numel_default = eq_1 = None
sym_size_int_2: "Sym(s70)" = torch.ops.aten.sym_size.int(arange, 0)
add_1: "Sym(s31 + s70)" = sym_size_int + sym_size_int_2; sym_size_int = None
add_2: "Sym(s31 + s70)" = add_1 + 0
sym_size_int_3: "Sym(s53)" = torch.ops.aten.sym_size.int(arg14_1, 1); arg14_1 = None
sub: "Sym(s31 - s53 + s70)" = add_2 - sym_size_int_3; add_2 = None
gt: "Sym(s31 - s53 + s70 > 0)" = sub > 0; sub = gt = None
gt_1: "Sym(s53 > s31 + s70)" = sym_size_int_3 > add_1; sym_size_int_3 = gt_1 = None
arange_1: "i64[s31 + s70]" = torch.ops.aten.arange.default(add_1, device = device(type='cpu'), pin_memory = False); add_1 = None
add_: "i64[s31 + s70]" = torch.ops.aten.add_.Tensor(arange_1, 0)
sym_size_int_4: "Sym(s72)" = torch.ops.aten.sym_size.int(arg13_1, 0); arg13_1 = None
arange_2: "i64[s72]" = torch.ops.aten.arange.default(sym_size_int_4, device = device(type='cpu'), pin_memory = False); sym_size_int_4 = None
arange_3: "i64[1]" = torch.ops.aten.arange.default(1, device = device(type='cpu'), pin_memory = False)
movedim: "i64[s72]" = torch.ops.aten.movedim.int(arange_2, 0, 0); movedim = None
unsqueeze: "i64[1, 1]" = torch.ops.aten.unsqueeze.default(arange_3, 0); arange_3 = None
sym_size_int_5: "Sym(s72)" = torch.ops.aten.sym_size.int(arange_2, 0); arange_2 = None
expand: "i64[s72, 1]" = torch.ops.aten.expand.default(unsqueeze, [sym_size_int_5, 1]); unsqueeze = expand = None
unsqueeze_1: "i64[1, s70]" = torch.ops.aten.unsqueeze.default(arange, 0); arange = None
expand_1: "i64[s72, s70]" = torch.ops.aten.expand.default(unsqueeze_1, [sym_size_int_5, sym_size_int_2]); unsqueeze_1 = sym_size_int_2 = expand_1 = None
unsqueeze_2: "i64[1, s31 + s70]" = torch.ops.aten.unsqueeze.default(add_, 0); add_ = None
sym_size_int_6: "Sym(s31 + s70)" = torch.ops.aten.sym_size.int(arange_1, 0); arange_1 = None
expand_2: "i64[s72, s31 + s70]" = torch.ops.aten.expand.default(unsqueeze_2, [sym_size_int_5, sym_size_int_6]); unsqueeze_2 = sym_size_int_5 = sym_size_int_6 = expand_2 = None
cache-cache_patch:torch-oblivious-rt/inputs_batch1 EXPORT: 88%|████████▊ | 112/128 [01:17<00:04, 3.80it/s]
cache-cache_patch:transformers/inputs EXPORT: 88%|████████▊ | 112/128 [01:17<00:04, 3.80it/s] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:transformers/inputs VALIDATE: 88%|████████▊ | 112/128 [01:23<00:04, 3.80it/s]
cache-cache_patch:transformers/inputs VALIDATE: 88%|████████▊ | 113/128 [01:23<00:29, 1.94s/it]
cache-cache_patch:transformers/inputs2 EXPORT: 88%|████████▊ | 113/128 [01:23<00:29, 1.94s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:transformers/inputs2 VALIDATE: 88%|████████▊ | 113/128 [01:25<00:29, 1.94s/it]
cache-cache_patch:transformers/inputs2 VALIDATE: 89%|████████▉ | 114/128 [01:26<00:30, 2.16s/it]
cache-cache_patch:transformers/inputs_empty_cache EXPORT: 89%|████████▉ | 114/128 [01:26<00:30, 2.16s/it]
cache-cache_patch:transformers/inputs_empty_cache EXPORT: 90%|████████▉ | 115/128 [01:27<00:23, 1.78s/it]
cache-cache_patch:transformers/inputs_batch1 EXPORT: 90%|████████▉ | 115/128 [01:27<00:23, 1.78s/it]
cache-cache_patch:transformers/inputs_batch1 EXPORT: 91%|█████████ | 116/128 [01:27<00:17, 1.46s/it]
cache-cache_patch:transformers-rt/inputs EXPORT: 91%|█████████ | 116/128 [01:27<00:17, 1.46s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:transformers-rt/inputs VALIDATE: 91%|█████████ | 116/128 [01:31<00:17, 1.46s/it]
cache-cache_patch:transformers-rt/inputs VALIDATE: 91%|█████████▏| 117/128 [01:31<00:25, 2.30s/it]
cache-cache_patch:transformers-rt/inputs2 EXPORT: 91%|█████████▏| 117/128 [01:31<00:25, 2.30s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:transformers-rt/inputs2 VALIDATE: 91%|█████████▏| 117/128 [01:37<00:25, 2.30s/it]
cache-cache_patch:transformers-rt/inputs2 VALIDATE: 92%|█████████▏| 118/128 [01:37<00:33, 3.33s/it]
cache-cache_patch:transformers-rt/inputs_empty_cache EXPORT: 92%|█████████▏| 118/128 [01:37<00:33, 3.33s/it]
cache-cache_patch:transformers-rt/inputs_empty_cache EXPORT: 93%|█████████▎| 119/128 [01:38<00:23, 2.56s/it]
cache-cache_patch:transformers-rt/inputs_batch1 EXPORT: 93%|█████████▎| 119/128 [01:38<00:23, 2.56s/it]
cache-cache_patch:transformers-rt/inputs_batch1 EXPORT: 94%|█████████▍| 120/128 [01:39<00:16, 2.01s/it]
cache-cache_patch:transformers-oblivious/inputs EXPORT: 94%|█████████▍| 120/128 [01:39<00:16, 2.01s/it][torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:transformers-oblivious/inputs VALIDATE: 94%|█████████▍| 120/128 [01:43<00:16, 2.01s/it]
cache-cache_patch:transformers-oblivious/inputs VALIDATE: 95%|█████████▍| 121/128 [01:43<00:19, 2.75s/it]
cache-cache_patch:transformers-oblivious/inputs2 EXPORT: 95%|█████████▍| 121/128 [01:43<00:19, 2.75s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:transformers-oblivious/inputs2 VALIDATE: 95%|█████████▍| 121/128 [01:47<00:19, 2.75s/it]
cache-cache_patch:transformers-oblivious/inputs2 VALIDATE: 95%|█████████▌| 122/128 [01:48<00:19, 3.28s/it]
cache-cache_patch:transformers-oblivious/inputs_empty_cache EXPORT: 95%|█████████▌| 122/128 [01:48<00:19, 3.28s/it][torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:transformers-oblivious/inputs_empty_cache VALIDATE: 95%|█████████▌| 122/128 [01:52<00:19, 3.28s/it]
cache-cache_patch:transformers-oblivious/inputs_empty_cache VALIDATE: 96%|█████████▌| 123/128 [01:52<00:18, 3.62s/it]
cache-cache_patch:transformers-oblivious/inputs_batch1 EXPORT: 96%|█████████▌| 123/128 [01:52<00:18, 3.62s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ❌
cache-cache_patch:transformers-oblivious/inputs_batch1 EXPORT: 97%|█████████▋| 124/128 [01:57<00:16, 4.11s/it]
cache-cache_patch:transformers-oblivious-rt/inputs EXPORT: 97%|█████████▋| 124/128 [01:57<00:16, 4.11s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:transformers-oblivious-rt/inputs VALIDATE: 97%|█████████▋| 124/128 [02:00<00:16, 4.11s/it]
cache-cache_patch:transformers-oblivious-rt/inputs VALIDATE: 98%|█████████▊| 125/128 [02:01<00:11, 3.87s/it]
cache-cache_patch:transformers-oblivious-rt/inputs2 EXPORT: 98%|█████████▊| 125/128 [02:01<00:11, 3.87s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:transformers-oblivious-rt/inputs2 VALIDATE: 98%|█████████▊| 125/128 [02:05<00:11, 3.87s/it]
cache-cache_patch:transformers-oblivious-rt/inputs2 VALIDATE: 98%|█████████▊| 126/128 [02:05<00:08, 4.09s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_empty_cache EXPORT: 98%|█████████▊| 126/128 [02:05<00:08, 4.09s/it][torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: batch will not be used, since it shares the same shape constraints with another axis: batch.
warnings.warn(
~/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_dynamic_shapes.py:264: UserWarning: # The axis name: cache+seq will not be used, since it shares the same shape constraints with another axis: seq_length.
warnings.warn(
Applied 39 of general pattern rewrite rules.
cache-cache_patch:transformers-oblivious-rt/inputs_empty_cache VALIDATE: 98%|█████████▊| 126/128 [02:09<00:08, 4.09s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_empty_cache VALIDATE: 99%|█████████▉| 127/128 [02:10<00:04, 4.20s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_batch1 EXPORT: 99%|█████████▉| 127/128 [02:10<00:04, 4.20s/it] [torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ❌
cache-cache_patch:transformers-oblivious-rt/inputs_batch1 EXPORT: 100%|██████████| 128/128 [02:15<00:00, 4.61s/it]
cache-cache_patch:transformers-oblivious-rt/inputs_batch1 EXPORT: 100%|██████████| 128/128 [02:15<00:00, 1.06s/it]
Let’s save the results.
df = pandas.DataFrame(results)
df.to_excel("plot_export_tiny_llm_dim01_onnx.xlsx")
df
no_export = df[df.EXPORT == 0]
no_export.to_excel("plot_export_tiny_llm_dim01_onnx.no_export.xlsx")
no_export
The validation failures.
If you have any error, then look at example Export Tiny-LLM with patches.
doc.plot_legend("Tiny-LLM\nexport with\ndimension in {0,1}", "torch.onnx.export", "tomato")

Total running time of the script: (2 minutes 20.810 seconds)
Related examples

Export with dynamic dimensions in {0,1} into ONNX (custom)