Export Times

Custom Exporter

With a very simple model:

<<<

import time
from experimental_experiment.checks import print_import_time

print_import_time()

import torch
import experimental_experiment.torch_interpreter


class Neuron(torch.nn.Module):
    def __init__(self, n_dims: int, n_targets: int):
        super(Neuron, self).__init__()
        self.linear = torch.nn.Linear(n_dims, n_targets)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))


model = Neuron(3, 1)
x = torch.rand(5, 3)

begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 1x --- {time.perf_counter() - begin}")

begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(model, (x,))
print(f"time to export 2x --- {time.perf_counter() - begin}")

>>>

    time to import onnx --- 0.5991993869974976
    time to import onnx_array_api --- 0.00012086800052202307
    time to import torch --- 1.9300789539993275
    'torch.export' already imported
    time to import torch.export --- 2.230000973213464e-06
    time to import onnxscript --- 0.13854226599869435
    time to import onnxruntime --- 0.03265998099959688
    time to import torch.onnx --- 0.03169962599713472
    time to import torch._dynamo --- 1.41928641299819
    time to import experimental_experiment.torch_interpreter --- 2.7358014860001276
    time to import experimental_experiment.torch_interpreter.aten_functions --- 0.007763280998915434
    time to export 1x --- 3.4872656509978697
    time to export 2x --- 0.02234246900115977
    [runpythonerror]
    use_kernel_func_from_hub is not available in the installed kernels version. Please upgrade kernels to use this feature.

With a bigger model:

<<<

import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.ext_test_case import get_llama_model

model, example_args_collection = get_llama_model(
    input_dims=[(2, 1024)],
    hidden_size=4096,
    num_hidden_layers=1,
    vocab_size=32000,
    intermediate_size=11008,
    max_position_embeddings=2048,
    num_attention_heads=32,
    _attn_implementation="eager",
)

begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
    model, example_args_collection[0]
)
print(f"time to export 1x --- {time.perf_counter() - begin}")

begin = time.perf_counter()
onx = experimental_experiment.torch_interpreter.to_onnx(
    model, example_args_collection[0]
)
print(f"time to export 2x --- {time.perf_counter() - begin}")

>>>

    
    [runpythonerror]
    use_kernel_func_from_hub is not available in the installed kernels version. Please upgrade kernels to use this feature.
    Traceback (most recent call last):
      File "<stdin>", line 26, in <module>
    TypeError: get_llama_model() got an unexpected keyword argument 'input_dims'

Dynamo Exporter

<<<

import time
import warnings

from experimental_experiment.checks import print_import_time

print_import_time()

import torch
import experimental_experiment.torch_interpreter


class Neuron(torch.nn.Module):
    def __init__(self, n_dims: int, n_targets: int):
        super(Neuron, self).__init__()
        self.linear = torch.nn.Linear(n_dims, n_targets)

    def forward(self, x):
        return torch.sigmoid(self.linear(x))


model = Neuron(3, 1)
x = torch.rand(5, 3)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    begin = time.perf_counter()
    onx = torch.onnx.export(model, x, dynamo=True)
    print(f"time to export 1x --- {time.perf_counter() - begin}")

    begin = time.perf_counter()
    onx = torch.onnx.export(model, x, dynamo=True)
    print(f"time to export 2x --- {time.perf_counter() - begin}")

>>>

    time to import onnx --- 0.5539983979979297
    time to import onnx_array_api --- 0.0003355670014570933
    time to import torch --- 1.8434638290018484
    'torch.export' already imported
    time to import torch.export --- 2.3649990907870233e-06
    time to import onnxscript --- 0.12674686499667587
    time to import onnxruntime --- 0.025429938999877777
    time to import torch.onnx --- 0.03100906799954828
    time to import torch._dynamo --- 1.1340352190018166
    time to import experimental_experiment.torch_interpreter --- 3.0285749489994487
    time to import experimental_experiment.torch_interpreter.aten_functions --- 0.0069390030002978165
    [torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`...
    [torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`... ✅
    [torch.onnx] Run decompositions...
    [torch.onnx] Run decompositions... ✅
    [torch.onnx] Translate the graph into ONNX...
    [torch.onnx] Translate the graph into ONNX... ✅
    [torch.onnx] Optimize the ONNX graph...
    [torch.onnx] Optimize the ONNX graph... ✅
    time to export 1x --- 1.9132904530015367
    [torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`...
    [torch.onnx] Obtain model graph for `Neuron([...]` with `torch.export.export(..., strict=False)`... ✅
    [torch.onnx] Run decompositions...
    [torch.onnx] Run decompositions... ✅
    [torch.onnx] Translate the graph into ONNX...
    [torch.onnx] Translate the graph into ONNX... ✅
    [torch.onnx] Optimize the ONNX graph...
    [torch.onnx] Optimize the ONNX graph... ✅
    time to export 2x --- 0.8022989689998212

With a bigger model:

<<<

import time
import warnings
import numpy as np
from transformers import LlamaConfig
from transformers.models.llama.modeling_llama import LlamaModel
import onnx
import onnxruntime
import torch
import torch._dynamo
import torch.export
import onnxscript
import torch.onnx
import experimental_experiment
import experimental_experiment.torch_interpreter
import experimental_experiment.torch_interpreter.aten_functions
from experimental_experiment.ext_test_case import get_llama_model

model, example_args_collection = get_llama_model(
    input_dims=[(2, 1024)],
    hidden_size=4096,
    num_hidden_layers=1,
    vocab_size=32000,
    intermediate_size=11008,
    max_position_embeddings=2048,
    num_attention_heads=32,
    _attn_implementation="eager",
)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")

    begin = time.perf_counter()
    onx = torch.onnx.export(model, *example_args_collection[0], dynamo=True)
    print(f"time to export 1x --- {time.perf_counter() - begin}")

    begin = time.perf_counter()
    onx = torch.onnx.export(model, *example_args_collection[0], dynamo=True)
    print(f"time to export 2x --- {time.perf_counter() - begin}")

>>>

    
    [runpythonerror]
    use_kernel_func_from_hub is not available in the installed kernels version. Please upgrade kernels to use this feature.
    Traceback (most recent call last):
      File "<stdin>", line 26, in <module>
    TypeError: get_llama_model() got an unexpected keyword argument 'input_dims'