Times¶
fx_mode¶
symbolic¶
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_decoder
begin = time.perf_counter()
print("creating model")
model, example_args_collection = get_llama_decoder(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=2,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
torch._dynamo.reset()
begin = time.perf_counter()
torch._dynamo.export(model, tracing_mode="symbolic")(*example_args_collection[0])
print(f"time to export symbolic --- {time.perf_counter() - begin}")
>>>
creating model
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:206: DeprecationWarning: torch.get_autocast_cpu_dtype() is deprecated. Please use torch.get_autocast_dtype('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:728.)
self.fast_dtype = torch.get_autocast_cpu_dtype()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:327: DeprecationWarning: torch.is_autocast_cpu_enabled() is deprecated. Please use torch.is_autocast_enabled('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:593.)
self.prev = torch.is_autocast_cpu_enabled()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:328: DeprecationWarning: torch.get_autocast_cpu_dtype() is deprecated. Please use torch.get_autocast_dtype('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:728.)
self.prev_fastdtype = torch.get_autocast_cpu_dtype()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:329: DeprecationWarning: torch.set_autocast_cpu_enabled(enabled) is deprecated. Please use torch.set_autocast_enabled('cpu', enabled) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:584.)
torch.set_autocast_cpu_enabled(self._enabled)
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:330: DeprecationWarning: torch.set_autocast_cpu_dtype(dtype) is deprecated. Please use torch.set_autocast_dtype('cpu', dtype) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:677.)
torch.set_autocast_cpu_dtype(self.fast_dtype) # type: ignore[arg-type]
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:378: DeprecationWarning: torch.set_autocast_cpu_enabled(enabled) is deprecated. Please use torch.set_autocast_enabled('cpu', enabled) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:584.)
torch.set_autocast_cpu_enabled(self.prev)
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:379: DeprecationWarning: torch.set_autocast_cpu_dtype(dtype) is deprecated. Please use torch.set_autocast_dtype('cpu', dtype) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:677.)
torch.set_autocast_cpu_dtype(self.prev_fastdtype)
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:206: DeprecationWarning: torch.get_autocast_cpu_dtype() is deprecated. Please use torch.get_autocast_dtype('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:728.)
self.fast_dtype = torch.get_autocast_cpu_dtype()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:327: DeprecationWarning: torch.is_autocast_cpu_enabled() is deprecated. Please use torch.is_autocast_enabled('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:593.)
self.prev = torch.is_autocast_cpu_enabled()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:328: DeprecationWarning: torch.get_autocast_cpu_dtype() is deprecated. Please use torch.get_autocast_dtype('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:728.)
self.prev_fastdtype = torch.get_autocast_cpu_dtype()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:329: DeprecationWarning: torch.set_autocast_cpu_enabled(enabled) is deprecated. Please use torch.set_autocast_enabled('cpu', enabled) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:584.)
torch.set_autocast_cpu_enabled(self._enabled)
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:330: DeprecationWarning: torch.set_autocast_cpu_dtype(dtype) is deprecated. Please use torch.set_autocast_dtype('cpu', dtype) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:677.)
torch.set_autocast_cpu_dtype(self.fast_dtype) # type: ignore[arg-type]
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:378: DeprecationWarning: torch.set_autocast_cpu_enabled(enabled) is deprecated. Please use torch.set_autocast_enabled('cpu', enabled) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:584.)
torch.set_autocast_cpu_enabled(self.prev)
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:379: DeprecationWarning: torch.set_autocast_cpu_dtype(dtype) is deprecated. Please use torch.set_autocast_dtype('cpu', dtype) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:677.)
torch.set_autocast_cpu_dtype(self.prev_fastdtype)
time to export symbolic --- 0.9746730999977444
fake¶
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_decoder
begin = time.perf_counter()
print("creating model")
model, example_args_collection = get_llama_decoder(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=2,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
torch._dynamo.reset()
begin = time.perf_counter()
torch._dynamo.export(model, tracing_mode="fake")(*example_args_collection[0])
print(f"time to export fake --- {time.perf_counter() - begin}")
>>>
creating model
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:206: DeprecationWarning: torch.get_autocast_cpu_dtype() is deprecated. Please use torch.get_autocast_dtype('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:728.)
self.fast_dtype = torch.get_autocast_cpu_dtype()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:327: DeprecationWarning: torch.is_autocast_cpu_enabled() is deprecated. Please use torch.is_autocast_enabled('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:593.)
self.prev = torch.is_autocast_cpu_enabled()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:328: DeprecationWarning: torch.get_autocast_cpu_dtype() is deprecated. Please use torch.get_autocast_dtype('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:728.)
self.prev_fastdtype = torch.get_autocast_cpu_dtype()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:329: DeprecationWarning: torch.set_autocast_cpu_enabled(enabled) is deprecated. Please use torch.set_autocast_enabled('cpu', enabled) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:584.)
torch.set_autocast_cpu_enabled(self._enabled)
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:330: DeprecationWarning: torch.set_autocast_cpu_dtype(dtype) is deprecated. Please use torch.set_autocast_dtype('cpu', dtype) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:677.)
torch.set_autocast_cpu_dtype(self.fast_dtype) # type: ignore[arg-type]
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:378: DeprecationWarning: torch.set_autocast_cpu_enabled(enabled) is deprecated. Please use torch.set_autocast_enabled('cpu', enabled) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:584.)
torch.set_autocast_cpu_enabled(self.prev)
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:379: DeprecationWarning: torch.set_autocast_cpu_dtype(dtype) is deprecated. Please use torch.set_autocast_dtype('cpu', dtype) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:677.)
torch.set_autocast_cpu_dtype(self.prev_fastdtype)
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:206: DeprecationWarning: torch.get_autocast_cpu_dtype() is deprecated. Please use torch.get_autocast_dtype('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:728.)
self.fast_dtype = torch.get_autocast_cpu_dtype()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:327: DeprecationWarning: torch.is_autocast_cpu_enabled() is deprecated. Please use torch.is_autocast_enabled('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:593.)
self.prev = torch.is_autocast_cpu_enabled()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:328: DeprecationWarning: torch.get_autocast_cpu_dtype() is deprecated. Please use torch.get_autocast_dtype('cpu') instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:728.)
self.prev_fastdtype = torch.get_autocast_cpu_dtype()
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:329: DeprecationWarning: torch.set_autocast_cpu_enabled(enabled) is deprecated. Please use torch.set_autocast_enabled('cpu', enabled) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:584.)
torch.set_autocast_cpu_enabled(self._enabled)
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:330: DeprecationWarning: torch.set_autocast_cpu_dtype(dtype) is deprecated. Please use torch.set_autocast_dtype('cpu', dtype) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:677.)
torch.set_autocast_cpu_dtype(self.fast_dtype) # type: ignore[arg-type]
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:378: DeprecationWarning: torch.set_autocast_cpu_enabled(enabled) is deprecated. Please use torch.set_autocast_enabled('cpu', enabled) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:584.)
torch.set_autocast_cpu_enabled(self.prev)
/home/xadupre/.local/lib/python3.10/site-packages/torch/amp/autocast_mode.py:379: DeprecationWarning: torch.set_autocast_cpu_dtype(dtype) is deprecated. Please use torch.set_autocast_dtype('cpu', dtype) instead. (Triggered internally at ../torch/csrc/autograd/init.cpp:677.)
torch.set_autocast_cpu_dtype(self.prev_fastdtype)
time to export fake --- 0.22533040000416804
Custom Exporter¶
With a very simple model:
<<<
import time
from experimental_experiment.checks import print_import_time
print_import_time()
import torch
import experimental_experiment.torch_interpreter
class Neuron(torch.nn.Module):
def __init__(self, n_dims: int, n_targets: int):
super(Neuron, self).__init__()
self.linear = torch.nn.Linear(n_dims, n_targets)
def forward(self, x):
return torch.sigmoid(self.linear(x))
model = Neuron(3, 1)
x = torch.rand(5, 3)
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
time to import onnx --- 0.18910039999900619
time to import onnx_array_api --- 0.0001554000045871362
time to import torch --- 1.0526032999987365
'torch.export' already imported
time to import torch.export --- 1.8999999156221747e-06
time to import onnxscript --- 0.0835092999986955
[2024-05-08 14:07:30,451] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
time to import onnxruntime --- 2.1038609000024735
'torch.onnx' already imported
time to import torch.onnx --- 2.3999964469112456e-06
time to import torch._dynamo --- 0.1279768000022159
time to import experimental_experiment.torch_interpreter --- 0.008113299998512957
time to import experimental_experiment.torch_interpreter.aten_functions --- 0.0021956999989924952
time to export 1x --- 0.3059434999959194
time to export 2x --- 0.04171910000150092
With a bigger model:
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_decoder
model, example_args_collection = get_llama_decoder(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=1,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
model, example_args_collection[0]
)
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
model, example_args_collection[0]
)
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
[2024-05-08 14:07:36,450] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[runpythonerror]
Traceback (most recent call last):
File "<stdin>", line 37, in <module>
File "/home/xadupre/github/experimental-experiment/experimental_experiment/torch_interpreter/onnx_export.py", line 321, in to_onnx
graph_module, builder, interpreter = _make_builder_interpreter(
File "/home/xadupre/github/experimental-experiment/experimental_experiment/torch_interpreter/onnx_export.py", line 175, in _make_builder_interpreter
exported_mod = _export(
File "/home/xadupre/github/experimental-experiment/experimental_experiment/torch_interpreter/onnx_export.py", line 102, in _export
exported_mod = torch.export.export(mod, args, dynamic_shapes=dynamic_shapes)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/export/__init__.py", line 174, in export
return _export(
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/export/_trace.py", line 833, in wrapper
raise e
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/export/_trace.py", line 816, in wrapper
ep = fn(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/export/exported_program.py", line 85, in wrapper
return fn(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/export/_trace.py", line 1168, in _export
ep_non_strict = _export_non_strict(
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/export/_trace.py", line 520, in _export_non_strict
gm, graph_signature = transform(aot_export_module)(
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 1135, in aot_export_module
fx_g, metadata, in_spec, out_spec = _aot_export_function(
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 1354, in _aot_export_function
fx_g, meta = create_aot_dispatcher_function(
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_dynamo/utils.py", line 268, in time_wrapper
r = func(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 685, in create_aot_dispatcher_function
compiled_fn = compiler_fn(
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 470, in aot_wrapper_dedupe
return compiler_fn(flat_fn, leaf_flat_args, aot_config, fw_metadata=fw_metadata)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 672, in aot_wrapper_synthetic_base
return compiler_fn(flat_fn, flat_args, aot_config, fw_metadata=fw_metadata)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py", line 134, in aot_dispatch_base_graph
fw_module = _create_graph(
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/dispatch_and_compile_graph.py", line 43, in _create_graph
fx_g = make_fx(
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/fx/experimental/proxy_tensor.py", line 1276, in wrapped
t = dispatch_trace(wrap_key(func, args, fx_tracer, pre_dispatch), tracer=fx_tracer, concrete_args=tuple(phs))
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 403, in _fn
return fn(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 36, in inner
return fn(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/fx/experimental/proxy_tensor.py", line 658, in dispatch_trace
graph = tracer.trace(root, concrete_args)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/fx/experimental/proxy_tensor.py", line 1028, in trace
res = super().trace(root, concrete_args)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 403, in _fn
return fn(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 36, in inner
return fn(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/fx/_symbolic_trace.py", line 793, in trace
(self.create_arg(fn(*args)),),
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/fx/experimental/proxy_tensor.py", line 676, in wrapped
out = f(*tensors)
File "<string>", line 1, in <lambda>
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/traced_function_transforms.py", line 387, in _functionalized_f_helper
f_outs = fn(*f_args)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/traced_function_transforms.py", line 71, in inner_fn
outs = fn(*args)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/utils.py", line 176, in flat_fn
tree_out = fn(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/traced_function_transforms.py", line 695, in functional_call
out = PropagateUnbackedSymInts(mod).run(
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/fx/interpreter.py", line 145, in run
self.env[node] = self.run_node(node)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/fx/experimental/symbolic_shapes.py", line 4835, in run_node
result = super().run_node(n)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/fx/interpreter.py", line 202, in run_node
return getattr(self, n.op)(n.target, args, kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/fx/interpreter.py", line 274, in call_function
return target(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/fx/experimental/proxy_tensor.py", line 721, in __torch_function__
return func(*args, **kwargs)
File "/home/xadupre/.local/lib/python3.10/site-packages/torch/export/_safeguard.py", line 37, in __torch_function__
raise RuntimeError(
RuntimeError: Encountered autograd state manager op <built-in function _set_grad_enabled> trying to change global autograd state while exporting. This is unsafe because we don't capture this op in torch.export today, hence we can't reflect the user intention soundly. You can fix this by adding a torch.no_grad() context around the export call.
While executing %_set_grad_enabled_1 : [num_users=0] = call_function[target=torch._C._set_grad_enabled](args = (True,), kwargs = {})
Original traceback:
None
Dynamo Exporter¶
<<<
import time
import warnings
from experimental_experiment.checks import print_import_time
print_import_time()
import torch
import experimental_experiment.torch_interpreter
class Neuron(torch.nn.Module):
def __init__(self, n_dims: int, n_targets: int):
super(Neuron, self).__init__()
self.linear = torch.nn.Linear(n_dims, n_targets)
def forward(self, x):
return torch.sigmoid(self.linear(x))
model = Neuron(3, 1)
x = torch.rand(5, 3)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
begin = time.perf_counter()
onx = torch.onnx.dynamo_export(model, x)
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = torch.onnx.dynamo_export(model, x)
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
time to import onnx --- 0.19956049999746028
time to import onnx_array_api --- 0.00016119999781949446
time to import torch --- 1.07216960000369
'torch.export' already imported
time to import torch.export --- 2.0000006770715117e-06
time to import onnxscript --- 0.09584490000270307
[2024-05-08 14:08:07,020] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
time to import onnxruntime --- 2.1254180999967502
'torch.onnx' already imported
time to import torch.onnx --- 1.5999976312741637e-06
time to import torch._dynamo --- 0.10234369999670889
time to import experimental_experiment.torch_interpreter --- 0.007185000002209563
time to import experimental_experiment.torch_interpreter.aten_functions --- 0.0016757000048528425
Applied 0 of general pattern rewrite rules.
Applied 0 of general pattern rewrite rules.
time to export 1x --- 0.8329499000028591
Applied 0 of general pattern rewrite rules.
Applied 0 of general pattern rewrite rules.
time to export 2x --- 0.07871480000176234
With a bigger model:
<<<
import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.torch_models.llama_helper import get_llama_decoder
model, example_args_collection = get_llama_decoder(
input_dims=[(2, 1024)],
hidden_size=4096,
num_hidden_layers=1,
vocab_size=32000,
intermediate_size=11008,
max_position_embeddings=2048,
num_attention_heads=32,
_attn_implementation="eager",
)
with warnings.catch_warnings():
warnings.simplefilter("ignore")
begin = time.perf_counter()
onx = torch.onnx.dynamo_export(model, *example_args_collection[0])
print(f"time to export 1x --- {time.perf_counter() - begin}")
begin = time.perf_counter()
onx = torch.onnx.dynamo_export(model, *example_args_collection[0])
print(f"time to export 2x --- {time.perf_counter() - begin}")
>>>
[2024-05-08 14:08:13,681] [INFO] [real_accelerator.py:158:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[runpythonerror]
2024-05-08 14:09:18,568 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue result_1 due to large size 67108864.
2024-05-08 14:09:18,575 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue t due to large size 67108864.
2024-05-08 14:09:18,583 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue result_1 due to large size 67108864.
2024-05-08 14:09:18,583 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue t_1 due to large size 67108864.
2024-05-08 14:09:18,589 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue result_1 due to large size 67108864.
2024-05-08 14:09:18,589 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue t_2 due to large size 67108864.
2024-05-08 14:09:18,734 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue result_1 due to large size 67108864.
2024-05-08 14:09:18,734 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue t_3 due to large size 67108864.
2024-05-08 14:09:18,748 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue result_1 due to large size 180355072.
2024-05-08 14:09:18,748 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue t_4 due to large size 180355072.
2024-05-08 14:09:18,752 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue result_1 due to large size 180355072.
2024-05-08 14:09:18,752 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue t_5 due to large size 180355072.
2024-05-08 14:09:18,756 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue result_1 due to large size 180355072.
2024-05-08 14:09:18,756 onnxscript.optimizer.constant_folding [WARNING] - Skip storing constant folded nvalue t_6 due to large size 180355072.