onnx_diagnostic.torch_models.code_sample¶
- onnx_diagnostic.torch_models.code_sample.code_sample(model_id: str, task: str | None = None, do_run: bool = False, exporter: str | None = None, do_same: bool = False, verbose: int = 0, dtype: str | dtype | None = None, device: str | device | None = None, same_as_pretrained: bool = False, use_pretrained: bool = False, optimization: str | None = None, quiet: bool = False, patch: bool | str | Dict[str, bool] = False, rewrite: bool = False, stop_if_static: int = 1, dump_folder: str | None = None, drop_inputs: List[str] | None = None, input_options: Dict[str, Any] | None = None, model_options: Dict[str, Any] | None = None, subfolder: str | None = None, opset: int | None = None, runtime: str = 'onnxruntime', output_names: List[str] | None = None) str[source][source]¶
This generates a code to export a model with the proper settings.
- Parameters:
model_id – model id to validate
task – task used to generate the necessary inputs, can be left empty to use the default task for this model if it can be determined
do_run – checks the model works with the defined inputs
exporter – exporter the model using this exporter, available list:
export-strict,export-nostrict, … see belowdo_same – checks the discrepancies of the exported model
verbose – verbosity level
dtype – uses this dtype to check the model
device – do the verification on this device
same_as_pretrained – use a model equivalent to the trained, this is not always possible
use_pretrained – use the trained model, not the untrained one
optimization – optimization to apply to the exported model, depend on the the exporter
quiet – if quiet, catches exception if any issue
patch – applies patches (
patch_transformers=True, path_diffusers=True) if True before exporting seeonnx_diagnostic.torch_export_patches.torch_export_patches(), a string can be used to specify only one of themrewrite – applies known rewriting (
patch_transformers=True) before exporting, seeonnx_diagnostic.torch_export_patches.torch_export_patches()stop_if_static – stops if a dynamic dimension becomes static, see
onnx_diagnostic.torch_export_patches.torch_export_patches()dump_folder – dumps everything in a subfolder of this one
drop_inputs – drops this list of inputs (given their names)
input_options – additional options to define the dummy inputs used to export
model_options – additional options when creating the model such as
num_hidden_layersorattn_implementationsubfolder – version or subfolders to uses when retrieving a model id
opset – onnx opset to use for the conversion
runtime – onnx runtime to use to check about discrepancies, possible values
onnxruntime,torch,orteval,orteval10,refonly if do_run is trueoutput_names – output names the onnx exporter should use
- Returns:
a code
<<<
from onnx_diagnostic.torch_models.code_sample import code_sample print( code_sample( "arnir0/Tiny-LLM", exporter="onnx-dynamo", optimization="ir", patch=True, ) )
>>>
from typing import Any import torch import onnxscript from onnx_diagnostic.torch_export_patches import torch_export_patches from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache def get_model_with_inputs( model_id: str, subfolder: str | None = None, dtype: str | torch.dtype | None = None, device: str | torch.device | None = None, same_as_pretrained: bool = False, use_pretrained: bool = False, input_options: dict[str, Any] | None = None, model_options: dict[str, Any] | None = None, ) -> dict[str, Any]: if use_pretrained: import transformers assert ( same_as_pretrained ), "same_as_pretrained must be True if use_pretrained is True" # tokenizer = AutoTokenizer.from_pretrained(model_path) model = transformers.AutoModel.from_pretrained( model_id, trust_remote_code=True, subfolder=subfolder, dtype=dtype, device=device, ) data = {"model": model} assert ( not input_options ), "Not implemented yet with input_options{input_options}" assert ( not model_options ), "Not implemented yet with input_options{model_options}" else: from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs data = get_untrained_model_with_inputs( model_id, use_pretrained=use_pretrained, same_as_pretrained=same_as_pretrained, inputs_kwargs=input_options, model_kwargs=model_options, subfolder=subfolder, add_second_input=False, ) if dtype: data["model"] = data["model"].to( getattr(torch, dtype) if isinstance(dtype, str) else dtype ) if device: data["model"] = data["model"].to(device) return data["model"] model = get_model_with_inputs("arnir0/Tiny-LLM") inputs = dict( input_ids=torch.randint(29562, size=(2, 3), dtype=torch.int64), attention_mask=torch.randint(1, size=(2, 33), dtype=torch.int64), position_ids=torch.randint(32, size=(2, 3), dtype=torch.int64), past_key_values=make_dynamic_cache( [ ( torch.rand((2, 1, 30, 96), dtype=torch.float32), torch.rand((2, 1, 30, 96), dtype=torch.float32), ) ] ), ) with torch_export_patches( patch_transformers=True, patch_diffusers=True, patch=True, stop_if_static=1 ): epo = torch.onnx.export( model, args=(), kwargs=inputs, dynamic_shapes={ "input_ids": {0: "batch", 1: "seq_length"}, "attention_mask": {0: "batch", 1: "cache+seq"}, "position_ids": {0: "batch", 1: "seq_length"}, "past_key_values": [ [{0: "batch", 2: "cache_length"}], [{0: "batch", 2: "cache_length"}], ], }, ) onnxscript.optimizer.optimize_ir(epo.model)