Export Times¶
Custom Exporter¶
With a very simple model:
<<<
import time
from experimental_experiment.checks import print_import_time
print_import_time()
import torch
import experimental_experiment.torch_interpreter
class Neuron(torch.nn.Module):
def __init__(self, n_dims: int, n_targets: int):
super(Neuron, self).__init__()
self.linear = torch.nn.Linear(n_dims, n_targets)
def forward(self, x):
return torch.sigmoid(self.linear(x))
model = Neuron(3, 1)
x = torch.rand(5, 3)
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
time to import onnx --- 1.2714726599951973
time to import onnx_array_api --- 0.0006885809998493642
time to import torch --- 4.143693677004194
'torch.export' already imported
time to import torch.export --- 4.556000931188464e-06
time to import onnxscript --- 1.1564491159952013
time to import onnxruntime --- 0.06228341600217391
time to import torch.onnx --- 0.1244188690034207
time to import torch._dynamo --- 1.6226410349918297
time to import experimental_experiment.torch_interpreter --- 4.138014933007071
time to import experimental_experiment.torch_interpreter.aten_functions --- 0.020298338000429794
time to export 1x --- 8.078760484000668
time to export 2x --- 0.08728941599838436
[runpythonerror]
use_kernel_func_from_hub is not available in the installed kernels version. Please upgrade kernels to use this feature.
With a bigger model:
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.ext_test_case import get_llama_model
model, example_args_collection = get_llama_model(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=1,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
model, example_args_collection[0]
)
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
model, example_args_collection[0]
)
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
[runpythonerror]
use_kernel_func_from_hub is not available in the installed kernels version. Please upgrade kernels to use this feature.
Traceback (most recent call last):
File "<stdin>", line 26, in <module>
TypeError: get_llama_model() got an unexpected keyword argument 'input_dims'
Dynamo Exporter¶
<<<
import time
import warnings
from experimental_experiment.checks import print_import_time
print_import_time()
import torch
import experimental_experiment.torch_interpreter
class Neuron(torch.nn.Module):
def __init__(self, n_dims: int, n_targets: int):
super(Neuron, self).__init__()
self.linear = torch.nn.Linear(n_dims, n_targets)
def forward(self, x):
return torch.sigmoid(self.linear(x))
model = Neuron(3, 1)
x = torch.rand(5, 3)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
begin = time.perf_counter()
onx = torch.onnx.export(model, x, dynamo=True)
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = torch.onnx.export(model, x, dynamo=True)
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
time to import onnx --- 0.8959575120097725
time to import onnx_array_api --- 0.00032108101004268974
time to import torch --- 4.476751504000276
'torch.export' already imported
time to import torch.export --- 5.131994839757681e-06
time to import onnxscript --- 1.3144609239971032
time to import onnxruntime --- 0.043791325006168336
time to import torch.onnx --- 0.09236413000326138
time to import torch._dynamo --- 1.9695585519948509
time to import experimental_experiment.torch_interpreter --- 4.034774877000018
time to import experimental_experiment.torch_interpreter.aten_functions --- 0.012122507992899045
[torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
[torch.onnx] Optimize the ONNX graph... ✅
time to export 1x --- 3.7234111340076197
[torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`...
[torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`... ✅
[torch.onnx] Run decompositions...
[torch.onnx] Run decompositions... ✅
[torch.onnx] Translate the graph into ONNX...
[torch.onnx] Translate the graph into ONNX... ✅
[torch.onnx] Optimize the ONNX graph...
[torch.onnx] Optimize the ONNX graph... ✅
time to export 2x --- 1.174936727999011
With a bigger model:
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.ext_test_case import get_llama_model
model, example_args_collection = get_llama_model(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=1,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
begin = time.perf_counter()
onx = torch.onnx.export(model, *example_args_collection[0], dynamo=True)
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = torch.onnx.export(model, *example_args_collection[0], dynamo=True)
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
[runpythonerror]
use_kernel_func_from_hub is not available in the installed kernels version. Please upgrade kernels to use this feature.
Traceback (most recent call last):
File "<stdin>", line 26, in <module>
TypeError: get_llama_model() got an unexpected keyword argument 'input_dims'