Export Times

fx_mode

symbolic

<<<

import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_model

begin = time.perf_counter()
print("creating model")
model, example_args_collection = get_llama_model(
    input_dims=[(2, 1024)],
    hidden_size=4096,
    num_hidden_layers=2,
    vocab_size=32000,
    intermediate_size=11008,
    max_position_embeddings=2048,
    num_attention_heads=32,
    _attn_implementation="eager",
)

torch._dynamo.reset()
begin = time.perf_counter()
torch._dynamo.export(model, tracing_mode="symbolic")(*example_args_collection[0])
print(f"time to export symbolic --- {time.perf_counter() - begin}")

>>>

    creating model
    time to export symbolic --- 2.3687471320008626

fake

<<<

import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_model

begin = time.perf_counter()
print("creating model")
model, example_args_collection = get_llama_model(
    input_dims=[(2, 1024)],
    hidden_size=4096,
    num_hidden_layers=2,
    vocab_size=32000,
    intermediate_size=11008,
    max_position_embeddings=2048,
    num_attention_heads=32,
    _attn_implementation="eager",
)

torch._dynamo.reset()
begin = time.perf_counter()
torch._dynamo.export(model, tracing_mode="fake")(*example_args_collection[0])
print(f"time to export fake --- {time.perf_counter() - begin}")

>>>

    creating model
    time to export fake --- 1.095509246999427

Custom Exporter

With a very simple model:

<<<

import time
from experimental_experiment.checks import print_import_time

print_import_time()

import torch
import experimental_experiment.torch_interpreter


class Neuron(torch.nn.Module):
    def __init__(self, n_dims: int, n_targets: int):
        super(Neuron, self).__init__()
        self.linear = torch.nn.Linear(n_dims, n_targets)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))


model = Neuron(3, 1)
x = torch.rand(5, 3)

begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 1x --- {time.perf_counter() - begin}")

begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 2x --- {time.perf_counter() - begin}")

>>>

    time to import onnx --- 0.544474567999714
    time to import onnx_array_api --- 0.00013185399802750908
    time to import torch --- 2.774635442001454
    'torch.export' already imported
    time to import torch.export --- 4.248999175615609e-06
    time to import onnxscript --- 0.15933950300313882
    time to import onnxruntime --- 2.745195068000612
    'torch.onnx' already imported
    time to import torch.onnx --- 1.5670011634938419e-06
    time to import torch._dynamo --- 1.0455590430028678
    time to import experimental_experiment.torch_interpreter --- 0.014880498001730302
    time to import experimental_experiment.torch_interpreter.aten_functions --- 0.004884293997747591
    time to export 1x --- 0.1767761920018529
    time to export 2x --- 0.045833503998437664
    [runpythonerror]
    /home/xadupre/vv/this312/lib/python3.12/site-packages/torch/backends/mkldnn/__init__.py:78: UserWarning: TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support. (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:148.)
      torch._C._set_onednn_allow_tf32(_allow_tf32)
    /home/xadupre/vv/this312/lib/python3.12/site-packages/torch/backends/mkldnn/__init__.py:78: UserWarning: TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support. (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:148.)
      torch._C._set_onednn_allow_tf32(_allow_tf32)
    /home/xadupre/vv/this312/lib/python3.12/site-packages/torch/backends/mkldnn/__init__.py:78: UserWarning: TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support. (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:148.)
      torch._C._set_onednn_allow_tf32(_allow_tf32)
    /home/xadupre/vv/this312/lib/python3.12/site-packages/torch/backends/mkldnn/__init__.py:78: UserWarning: TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support. (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:148.)
      torch._C._set_onednn_allow_tf32(_allow_tf32)

With a bigger model:

<<<

import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_model

model, example_args_collection = get_llama_model(
    input_dims=[(2, 1024)],
    hidden_size=4096,
    num_hidden_layers=1,
    vocab_size=32000,
    intermediate_size=11008,
    max_position_embeddings=2048,
    num_attention_heads=32,
    _attn_implementation="eager",
)

begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
    model, example_args_collection[0]
)
print(f"time to export 1x --- {time.perf_counter() - begin}")

begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
    model, example_args_collection[0]
)
print(f"time to export 2x --- {time.perf_counter() - begin}")

>>>

    time to export 1x --- 6.919692307998048
    time to export 2x --- 4.759556262000842
    [runpythonerror]
    /home/xadupre/vv/this312/lib/python3.12/site-packages/torch/backends/mkldnn/__init__.py:78: UserWarning: TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support. (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:148.)
      torch._C._set_onednn_allow_tf32(_allow_tf32)
    /home/xadupre/vv/this312/lib/python3.12/site-packages/torch/backends/mkldnn/__init__.py:78: UserWarning: TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support. (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:148.)
      torch._C._set_onednn_allow_tf32(_allow_tf32)
    /home/xadupre/vv/this312/lib/python3.12/site-packages/torch/backends/mkldnn/__init__.py:78: UserWarning: TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support. (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:148.)
      torch._C._set_onednn_allow_tf32(_allow_tf32)
    /home/xadupre/vv/this312/lib/python3.12/site-packages/torch/backends/mkldnn/__init__.py:78: UserWarning: TF32 acceleration on top of oneDNN is available for Intel GPUs. The current Torch version does not have Intel GPU Support. (Triggered internally at /pytorch/aten/src/ATen/Context.cpp:148.)
      torch._C._set_onednn_allow_tf32(_allow_tf32)

Dynamo Exporter

<<<

import time
import warnings

from experimental_experiment.checks import print_import_time

print_import_time()

import torch
import experimental_experiment.torch_interpreter


class Neuron(torch.nn.Module):
    def __init__(self, n_dims: int, n_targets: int):
        super(Neuron, self).__init__()
        self.linear = torch.nn.Linear(n_dims, n_targets)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))


model = Neuron(3, 1)
x = torch.rand(5, 3)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    begin = time.perf_counter()
    onx = torch.onnx.export(model, x, dynamo=True)
    print(f"time to export 1x --- {time.perf_counter() - begin}")

    begin = time.perf_counter()
    onx = torch.onnx.export(model, x, dynamo=True)
    print(f"time to export 2x --- {time.perf_counter() - begin}")

>>>

    time to import onnx --- 0.712857456997881
    time to import onnx_array_api --- 0.0001580500029376708
    time to import torch --- 3.357364586001495
    'torch.export' already imported
    time to import torch.export --- 8.7690023065079e-06
    time to import onnxscript --- 0.1631262090013479
    time to import onnxruntime --- 2.878802179999184
    'torch.onnx' already imported
    time to import torch.onnx --- 1.7029997252393514e-06
    time to import torch._dynamo --- 1.2133234329994593
    time to import experimental_experiment.torch_interpreter --- 0.027549766000447562
    time to import experimental_experiment.torch_interpreter.aten_functions --- 0.012157610999565804
    [torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`...
    [torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`... ✅
    [torch.onnx] Run decomposition...
    [torch.onnx] Run decomposition... ✅
    [torch.onnx] Translate the graph into ONNX...
    [torch.onnx] Translate the graph into ONNX... ✅
    time to export 1x --- 1.17246131399952
    [torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`...
    [torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`... ✅
    [torch.onnx] Run decomposition...
    [torch.onnx] Run decomposition... ✅
    [torch.onnx] Translate the graph into ONNX...
    [torch.onnx] Translate the graph into ONNX... ✅
    time to export 2x --- 0.5689955099987856

With a bigger model:

<<<

import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_model

model, example_args_collection = get_llama_model(
    input_dims=[(2, 1024)],
    hidden_size=4096,
    num_hidden_layers=1,
    vocab_size=32000,
    intermediate_size=11008,
    max_position_embeddings=2048,
    num_attention_heads=32,
    _attn_implementation="eager",
)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    begin = time.perf_counter()
    onx = torch.onnx.export(model, *example_args_collection[0], dynamo=True)
    print(f"time to export 1x --- {time.perf_counter() - begin}")

    begin = time.perf_counter()
    onx = torch.onnx.export(model, *example_args_collection[0], dynamo=True)
    print(f"time to export 2x --- {time.perf_counter() - begin}")

>>>

    [torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with `torch.export.export(..., strict=False)`...
    [torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with `torch.export.export(..., strict=False)`... ❌
    [torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with `torch.export.export`...
    [torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with `torch.export.export`... ❌
    [torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with Torch Script...
    [torch.onnx] Obtain model graph for `LlamaModelWrapper([...]` with Torch Script... ❌
    [runpythonerror]
    `loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.
    Traceback (most recent call last):
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_capture_strategies.py", line 110, in __call__
        exported_program = self._capture(model, args, kwargs, dynamic_shapes)
                           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_capture_strategies.py", line 190, in _capture
        return torch.export.export(
               ^^^^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/__init__.py", line 370, in export
        return _export(
               ^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1047, in wrapper
        raise e
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1020, in wrapper
        ep = fn(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/exported_program.py", line 121, in wrapper
        return fn(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 2083, in _export
        ep = _export_for_training(
             ^^^^^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1047, in wrapper
        raise e
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1020, in wrapper
        ep = fn(*args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/exported_program.py", line 121, in wrapper
        return fn(*args, **kwargs)
               ^^^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1946, in _export_for_training
        export_artifact = export_func(  # type: ignore[operator]
                          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/_trace.py", line 1843, in _non_strict_export
        ) = make_fake_inputs(
            ^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/_export/non_strict_utils.py", line 154, in make_fake_inputs
        combined_args = _combine_args(nn_module, args, kwargs)
                        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/export/dynamic_shapes.py", line 597, in _combine_args
        return signature.bind(*args, **kwargs).arguments
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/usr/lib/python3.12/inspect.py", line 3277, in bind
        return self._bind(args, kwargs)
               ^^^^^^^^^^^^^^^^^^^^^^^^
      File "/usr/lib/python3.12/inspect.py", line 3190, in _bind
        raise TypeError(msg) from None
    TypeError: missing a required argument: 'attention_mask'
    
    The above exception was the direct cause of the following exception:
    
    Traceback (most recent call last):
      File "<stdin>", line 39, in <module>
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/onnx/__init__.py", line 364, in export
        return _compat.export_compat(
               ^^^^^^^^^^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_compat.py", line 119, in export_compat
        onnx_program = _core.export(
                       ^^^^^^^^^^^^^
      File "/home/xadupre/vv/this312/lib/python3.12/site-packages/torch/onnx/_internal/exporter/_core.py", line 1291, in export
        raise _errors.TorchExportError(
    torch.onnx._internal.exporter._errors.TorchExportError: Failed to export the model with torch.export. This is step 1/3 of exporting the model to ONNX. Next steps:
    - Modify the model code for `torch.export.export` to succeed. Refer to https://pytorch.org/docs/stable/generated/exportdb/index.html for more information.
    - Debug `torch.export.export` and summit a PR to PyTorch.
    - Create an issue in the PyTorch GitHub repository against the *torch.export* component and attach the full error stack as well as reproduction scripts.
    
    ## Exception summary
    
    <class 'TypeError'>: missing a required argument: 'attention_mask'
    
    (Refer to the full stack trace above for more information.)