onnx_diagnostic.torch_models.code_sample¶
- onnx_diagnostic.torch_models.code_sample.code_sample(model_id: str, task: str | None = None, do_run: bool = False, exporter: str | None = None, do_same: bool = False, verbose: int = 0, dtype: str | dtype | None = None, device: str | device | None = None, same_as_pretrained: bool = False, use_pretrained: bool = False, optimization: str | None = None, quiet: bool = False, patch: bool | str | Dict[str, bool] = False, rewrite: bool = False, stop_if_static: int = 1, dump_folder: str | None = None, drop_inputs: List[str] | None = None, input_options: Dict[str, Any] | None = None, model_options: Dict[str, Any] | None = None, subfolder: str | None = None, opset: int | None = None, runtime: str = 'onnxruntime', output_names: List[str] | None = None) str[source][source]¶
- This generates a code to export a model with the proper settings. - Parameters:
- model_id – model id to validate 
- task – task used to generate the necessary inputs, can be left empty to use the default task for this model if it can be determined 
- do_run – checks the model works with the defined inputs 
- exporter – exporter the model using this exporter, available list: - export-strict,- export-nostrict, … see below
- do_same – checks the discrepancies of the exported model 
- verbose – verbosity level 
- dtype – uses this dtype to check the model 
- device – do the verification on this device 
- same_as_pretrained – use a model equivalent to the trained, this is not always possible 
- use_pretrained – use the trained model, not the untrained one 
- optimization – optimization to apply to the exported model, depend on the the exporter 
- quiet – if quiet, catches exception if any issue 
- patch – applies patches ( - patch_transformers=True, path_diffusers=True) if True before exporting see- onnx_diagnostic.torch_export_patches.torch_export_patches(), a string can be used to specify only one of them
- rewrite – applies known rewriting ( - patch_transformers=True) before exporting, see- onnx_diagnostic.torch_export_patches.torch_export_patches()
- stop_if_static – stops if a dynamic dimension becomes static, see - onnx_diagnostic.torch_export_patches.torch_export_patches()
- dump_folder – dumps everything in a subfolder of this one 
- drop_inputs – drops this list of inputs (given their names) 
- input_options – additional options to define the dummy inputs used to export 
- model_options – additional options when creating the model such as - num_hidden_layersor- attn_implementation
- subfolder – version or subfolders to uses when retrieving a model id 
- opset – onnx opset to use for the conversion 
- runtime – onnx runtime to use to check about discrepancies, possible values - onnxruntime,- torch,- orteval,- orteval10,- refonly if do_run is true
- output_names – output names the onnx exporter should use 
 
- Returns:
- a code 
 - <<< - from onnx_diagnostic.torch_models.code_sample import code_sample print( code_sample( "arnir0/Tiny-LLM", exporter="onnx-dynamo", optimization="ir", patch=True, ) ) - >>> - from typing import Any import torch import onnxscript from onnx_diagnostic.torch_export_patches import torch_export_patches from onnx_diagnostic.helpers.cache_helper import make_dynamic_cache def get_model_with_inputs( model_id: str, subfolder: str | None = None, dtype: str | torch.dtype | None = None, device: str | torch.device | None = None, same_as_pretrained: bool = False, use_pretrained: bool = False, input_options: dict[str, Any] | None = None, model_options: dict[str, Any] | None = None, ) -> dict[str, Any]: if use_pretrained: import transformers assert ( same_as_pretrained ), "same_as_pretrained must be True if use_pretrained is True" # tokenizer = AutoTokenizer.from_pretrained(model_path) model = transformers.AutoModel.from_pretrained( model_id, trust_remote_code=True, subfolder=subfolder, dtype=dtype, device=device, ) data = {"model": model} assert ( not input_options ), "Not implemented yet with input_options{input_options}" assert ( not model_options ), "Not implemented yet with input_options{model_options}" else: from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs data = get_untrained_model_with_inputs( model_id, use_pretrained=use_pretrained, same_as_pretrained=same_as_pretrained, inputs_kwargs=input_options, model_kwargs=model_options, subfolder=subfolder, add_second_input=False, ) if dtype: data["model"] = data["model"].to( getattr(torch, dtype) if isinstance(dtype, str) else dtype ) if device: data["model"] = data["model"].to(device) return data["model"] model = get_model_with_inputs("arnir0/Tiny-LLM") inputs = dict( input_ids=torch.randint(29562, size=(2, 3), dtype=torch.int64), attention_mask=torch.randint(1, size=(2, 33), dtype=torch.int64), position_ids=torch.randint(32, size=(2, 3), dtype=torch.int64), past_key_values=make_dynamic_cache( [ ( torch.rand((2, 1, 30, 96), dtype=torch.float32), torch.rand((2, 1, 30, 96), dtype=torch.float32), ) ] ), ) with torch_export_patches( patch_transformers=True, patch_diffusers=True, patch=True, stop_if_static=1 ): epo = torch.onnx.export( model, args=(), kwargs=inputs, dynamic_shapes={ "input_ids": {0: "batch", 1: "seq_length"}, "attention_mask": {0: "batch", 1: "cache+seq"}, "position_ids": {0: "batch", 1: "seq_length"}, "past_key_values": [ [{0: "batch", 2: "cache_length"}], [{0: "batch", 2: "cache_length"}], ], }, ) onnxscript.optimizer.optimize_ir(epo.model)