Export Times¶
fx_mode¶
symbolic¶
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_decoder
begin = time.perf_counter()
print("creating model")
model, example_args_collection = get_llama_decoder(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=2,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
torch._dynamo.reset()
begin = time.perf_counter()
torch._dynamo.export(model, tracing_mode="symbolic")(*example_args_collection[0])
print(f"time to export symbolic --- {time.perf_counter() - begin}")
>>>
creating model
time to export symbolic --- 1.0814032009984658
fake¶
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_decoder
begin = time.perf_counter()
print("creating model")
model, example_args_collection = get_llama_decoder(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=2,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
torch._dynamo.reset()
begin = time.perf_counter()
torch._dynamo.export(model, tracing_mode="fake")(*example_args_collection[0])
print(f"time to export fake --- {time.perf_counter() - begin}")
>>>
creating model
time to export fake --- 0.36824091900052736
Custom Exporter¶
With a very simple model:
<<<
import time
from experimental_experiment.checks import print_import_time
print_import_time()
import torch
import experimental_experiment.torch_interpreter
class Neuron(torch.nn.Module):
def __init__(self, n_dims: int, n_targets: int):
super(Neuron, self).__init__()
self.linear = torch.nn.Linear(n_dims, n_targets)
def forward(self, x):
return torch.sigmoid(self.linear(x))
model = Neuron(3, 1)
x = torch.rand(5, 3)
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
time to import onnx --- 0.24227722399882623
time to import onnx_array_api --- 0.0002272649981023278
time to import torch --- 1.6072333219999564
'torch.export' already imported
time to import torch.export --- 2.340002538403496e-06
time to import onnxscript --- 0.177370534001966
time to import onnxruntime --- 1.5473279760008154
'torch.onnx' already imported
time to import torch.onnx --- 4.593002813635394e-06
time to import torch._dynamo --- 0.6422698300011689
time to import experimental_experiment.torch_interpreter --- 0.012472520000301301
time to import experimental_experiment.torch_interpreter.aten_functions --- 0.005323658999259351
time to export 1x --- 0.25747910700010834
time to export 2x --- 0.11925635800071177
With a bigger model:
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_decoder
model, example_args_collection = get_llama_decoder(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=1,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
model, example_args_collection[0]
)
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
model, example_args_collection[0]
)
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
time to export 1x --- 2.257415819000016
time to export 2x --- 1.9129424070015375
Dynamo Exporter¶
<<<
import time
import warnings
from experimental_experiment.checks import print_import_time
print_import_time()
import torch
import experimental_experiment.torch_interpreter
class Neuron(torch.nn.Module):
def __init__(self, n_dims: int, n_targets: int):
super(Neuron, self).__init__()
self.linear = torch.nn.Linear(n_dims, n_targets)
def forward(self, x):
return torch.sigmoid(self.linear(x))
model = Neuron(3, 1)
x = torch.rand(5, 3)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
begin = time.perf_counter()
onx = torch.onnx.dynamo_export(model, x)
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = torch.onnx.dynamo_export(model, x)
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
time to import onnx --- 0.16292905500085908
time to import onnx_array_api --- 0.00022411799727706239
time to import torch --- 2.457190165998327
'torch.export' already imported
time to import torch.export --- 5.894999048905447e-06
time to import onnxscript --- 0.2898647820002225
time to import onnxruntime --- 1.4620527329971083
'torch.onnx' already imported
time to import torch.onnx --- 1.6049998521339148e-06
time to import torch._dynamo --- 0.6435438029984653
time to import experimental_experiment.torch_interpreter --- 0.013799967000522884
time to import experimental_experiment.torch_interpreter.aten_functions --- 0.005678852001437917
time to export 1x --- 1.83391523300088
time to export 2x --- 0.25887864099786384
With a bigger model:
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_decoder
model, example_args_collection = get_llama_decoder(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=1,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
begin = time.perf_counter()
onx = torch.onnx.dynamo_export(model, *example_args_collection[0])
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = torch.onnx.dynamo_export(model, *example_args_collection[0])
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
Applied 16 of general pattern rewrite rules.
time to export 1x --- 12.531696005000413
Applied 16 of general pattern rewrite rules.
time to export 2x --- 9.280181377002009