Export Times¶
fx_mode¶
symbolic¶
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_model
begin = time.perf_counter()
print("creating model")
model, example_args_collection = get_llama_model(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=2,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
torch._dynamo.reset()
begin = time.perf_counter()
torch._dynamo.export(model, tracing_mode="symbolic")(*example_args_collection[0])
print(f"time to export symbolic --- {time.perf_counter() - begin}")
>>>
creating model
time to export symbolic --- 1.557812183000351
fake¶
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_model
begin = time.perf_counter()
print("creating model")
model, example_args_collection = get_llama_model(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=2,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
torch._dynamo.reset()
begin = time.perf_counter()
torch._dynamo.export(model, tracing_mode="fake")(*example_args_collection[0])
print(f"time to export fake --- {time.perf_counter() - begin}")
>>>
creating model
time to export fake --- 0.6968330859999696
Custom Exporter¶
With a very simple model:
<<<
import time
from experimental_experiment.checks import print_import_time
print_import_time()
import torch
import experimental_experiment.torch_interpreter
class Neuron(torch.nn.Module):
def __init__(self, n_dims: int, n_targets: int):
super(Neuron, self).__init__()
self.linear = torch.nn.Linear(n_dims, n_targets)
def forward(self, x):
return torch.sigmoid(self.linear(x))
model = Neuron(3, 1)
x = torch.rand(5, 3)
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
time to import onnx --- 0.5862550989995725
time to import onnx_array_api --- 0.00013660400054504862
time to import torch --- 2.431166301000303
'torch.export' already imported
time to import torch.export --- 3.4679997042985633e-06
time to import onnxscript --- 0.15701776999958383
time to import onnxruntime --- 2.6400526240004183
'torch.onnx' already imported
time to import torch.onnx --- 2.3029997464618646e-06
time to import torch._dynamo --- 1.1766042910003307
time to import experimental_experiment.torch_interpreter --- 0.0207702479992804
time to import experimental_experiment.torch_interpreter.aten_functions --- 0.007149897000090277
time to export 1x --- 0.285166689000107
time to export 2x --- 0.0748094359996685
With a bigger model:
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_model
model, example_args_collection = get_llama_model(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=1,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
model, example_args_collection[0]
)
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
model, example_args_collection[0]
)
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
time to export 1x --- 5.302668850000373
time to export 2x --- 3.419302964000053
Dynamo Exporter¶
<<<
import time
import warnings
from experimental_experiment.checks import print_import_time
print_import_time()
import torch
import experimental_experiment.torch_interpreter
class Neuron(torch.nn.Module):
def __init__(self, n_dims: int, n_targets: int):
super(Neuron, self).__init__()
self.linear = torch.nn.Linear(n_dims, n_targets)
def forward(self, x):
return torch.sigmoid(self.linear(x))
model = Neuron(3, 1)
x = torch.rand(5, 3)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
begin = time.perf_counter()
onx = torch.onnx.export(model, x, dynamo=True)
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = torch.onnx.export(model, x, dynamo=True)
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
time to import onnx --- 0.5168091929999719
time to import onnx_array_api --- 0.00013621500056615332
time to import torch --- 2.3385872409999138
'torch.export' already imported
time to import torch.export --- 3.6450001061894e-06
time to import onnxscript --- 0.1515186510005151
time to import onnxruntime --- 2.1815903690003324
'torch.onnx' already imported
time to import torch.onnx --- 2.5079998522414826e-06
time to import torch._dynamo --- 1.0840408919993934
time to import experimental_experiment.torch_interpreter --- 0.01964940400011983
time to import experimental_experiment.torch_interpreter.aten_functions --- 0.006008702999679372
[torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
time to export 1x --- 1.9180186139992657
[torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decomposition...
[torch.onnx] Run decomposition... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
time to export 2x --- 0.6619092529999762
With a bigger model:
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_model
model, example_args_collection = get_llama_model(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=1,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
begin = time.perf_counter()
onx = torch.onnx.export(model, *example_args_collection[0], dynamo=True)
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = torch.onnx.export(model, *example_args_collection[0], dynamo=True)
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
[torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with `torch.export.export(..., strict=False)`... ❌
[torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with `torch.export.export`...
[torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with `torch.export.export`... ❌
[torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with Torch Script...
[torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with Torch Script... ❌
[runpythonerror]
`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
Traceback (most recent call last):
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_capture_strategies.py", line 110, in __call__
exported_program = self._capture(model, args, kwargs, dynamic_shapes)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_capture_strategies.py", line 190, in _capture
return torch.export.export(
^^^^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/__init__.py", line 370, in export
return _export(
^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1047, in wrapper
raise e
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1020, in wrapper
ep = fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/exported_program.py", line 121, in wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 2083, in _export
ep = _export_for_training(
^^^^^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1047, in wrapper
raise e
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1020, in wrapper
ep = fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/exported_program.py", line 121, in wrapper
return fn(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1946, in _export_for_training
export_artifact = export_func( # type: ignore[operator]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1843, in _non_strict_export
) = make_fake_inputs(
^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/_export/non_strict_utils.py", line 154, in make_fake_inputs
combined_args = _combine_args(nn_module, args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/dynamic_shapes.py", line 597, in _combine_args
return signature.bind(*args, **kwargs).arguments
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/inspect.py", line 3277, in bind
return self._bind(args, kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^
File "/usr/lib/python3.12/inspect.py", line 3190, in _bind
raise TypeError(msg) from None
TypeError: missing a required argument: 'attention_mask'
The above exception was the direct cause of the following exception:
Traceback (most recent call last):
File "<stdin>", line 39, in <module>
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/onnx/__init__.py", line 364, in export
return _compat.export_compat(
^^^^^^^^^^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_compat.py", line 118, in export_compat
onnx_program = _core.export(
^^^^^^^^^^^^^
File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_core.py", line 1296, in export
raise _errors.TorchExportError(
torch.onnx._internal.exporter._errors.TorchExportError: Failed to export the model with torch.export. [96mThis is step 1/3[0m of exporting the model to ONNX. Next steps:
- Modify the model code for `torch.export.export` to succeed. Refer to https://pytorch.org/docs/stable/generated/exportdb/index.html for more information.
- Debug `torch.export.export` and summit a PR to PyTorch.
- Create an issue in the PyTorch GitHub repository against the [96m*torch.export*[0m component and attach the full error stack as well as reproduction scripts.
## Exception summary
<class 'TypeError'>: missing a required argument: 'attention_mask'
(Refer to the full stack trace above for more information.)