Note
Go to the end to download the full example code.
Export Gemma3 tiny random with InputObserver¶
This reuses the recipe introduced by example Export a LLM with InputObserver (with Tiny-LLM) for model tiny-random/gemma-3.
import pandas
import torch
from onnx_diagnostic import doc
from onnx_diagnostic.helpers import string_type
from onnx_diagnostic.export.api import to_onnx
from onnx_diagnostic.torch_export_patches import (
register_additional_serialization_functions,
torch_export_patches,
)
from onnx_diagnostic.investigate.input_observer import InputObserver
from transformers import pipeline
model_id = "tiny-random/gemma-3"
pipe = pipeline(
"image-text-to-text",
model=model_id,
device="cpu",
trust_remote_code=True,
max_new_tokens=3,
dtype=torch.float16,
)
messages = [
{"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
{
"role": "user",
"content": [
{
"type": "image",
"url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG",
},
{"type": "text", "text": "What animal is on the candy?"},
],
},
]
Loading weights: 0%| | 0/67 [00:00<?, ?it/s]
Loading weights: 1%|▏ | 1/67 [00:00<00:00, 37449.14it/s, Materializing param=model.language_model.embed_tokens.weight]
Loading weights: 1%|▏ | 1/67 [00:00<00:00, 7182.03it/s, Materializing param=model.language_model.embed_tokens.weight]
Loading weights: 3%|▎ | 2/67 [00:00<00:00, 4673.32it/s, Materializing param=model.language_model.layers.0.input_layernorm.weight]
Loading weights: 3%|▎ | 2/67 [00:00<00:00, 3639.31it/s, Materializing param=model.language_model.layers.0.input_layernorm.weight]
Loading weights: 4%|▍ | 3/67 [00:00<00:00, 2647.92it/s, Materializing param=model.language_model.layers.0.mlp.down_proj.weight]
Loading weights: 4%|▍ | 3/67 [00:00<00:00, 2264.74it/s, Materializing param=model.language_model.layers.0.mlp.down_proj.weight]
Loading weights: 6%|▌ | 4/67 [00:00<00:00, 2146.25it/s, Materializing param=model.language_model.layers.0.mlp.gate_proj.weight]
Loading weights: 6%|▌ | 4/67 [00:00<00:00, 2007.32it/s, Materializing param=model.language_model.layers.0.mlp.gate_proj.weight]
Loading weights: 7%|▋ | 5/67 [00:00<00:00, 2089.42it/s, Materializing param=model.language_model.layers.0.mlp.up_proj.weight]
Loading weights: 7%|▋ | 5/67 [00:00<00:00, 1974.35it/s, Materializing param=model.language_model.layers.0.mlp.up_proj.weight]
Loading weights: 9%|▉ | 6/67 [00:00<00:00, 1868.29it/s, Materializing param=model.language_model.layers.0.post_attention_layernorm.weight]
Loading weights: 9%|▉ | 6/67 [00:00<00:00, 1763.92it/s, Materializing param=model.language_model.layers.0.post_attention_layernorm.weight]
Loading weights: 10%|█ | 7/67 [00:00<00:00, 1301.77it/s, Materializing param=model.language_model.layers.0.post_feedforward_layernorm.weight]
Loading weights: 10%|█ | 7/67 [00:00<00:00, 1261.44it/s, Materializing param=model.language_model.layers.0.post_feedforward_layernorm.weight]
Loading weights: 12%|█▏ | 8/67 [00:00<00:00, 1375.18it/s, Materializing param=model.language_model.layers.0.pre_feedforward_layernorm.weight]
Loading weights: 12%|█▏ | 8/67 [00:00<00:00, 1349.14it/s, Materializing param=model.language_model.layers.0.pre_feedforward_layernorm.weight]
Loading weights: 13%|█▎ | 9/67 [00:00<00:00, 1465.52it/s, Materializing param=model.language_model.layers.0.self_attn.k_norm.weight]
Loading weights: 13%|█▎ | 9/67 [00:00<00:00, 1442.39it/s, Materializing param=model.language_model.layers.0.self_attn.k_norm.weight]
Loading weights: 15%|█▍ | 10/67 [00:00<00:00, 1544.47it/s, Materializing param=model.language_model.layers.0.self_attn.k_proj.weight]
Loading weights: 15%|█▍ | 10/67 [00:00<00:00, 1521.38it/s, Materializing param=model.language_model.layers.0.self_attn.k_proj.weight]
Loading weights: 16%|█▋ | 11/67 [00:00<00:00, 1027.86it/s, Materializing param=model.language_model.layers.0.self_attn.o_proj.weight]
Loading weights: 16%|█▋ | 11/67 [00:00<00:00, 1005.98it/s, Materializing param=model.language_model.layers.0.self_attn.o_proj.weight]
Loading weights: 18%|█▊ | 12/67 [00:00<00:00, 1070.52it/s, Materializing param=model.language_model.layers.0.self_attn.q_norm.weight]
Loading weights: 18%|█▊ | 12/67 [00:00<00:00, 1059.19it/s, Materializing param=model.language_model.layers.0.self_attn.q_norm.weight]
Loading weights: 19%|█▉ | 13/67 [00:00<00:00, 1123.51it/s, Materializing param=model.language_model.layers.0.self_attn.q_proj.weight]
Loading weights: 19%|█▉ | 13/67 [00:00<00:00, 1113.75it/s, Materializing param=model.language_model.layers.0.self_attn.q_proj.weight]
Loading weights: 21%|██ | 14/67 [00:00<00:00, 1146.50it/s, Materializing param=model.language_model.layers.0.self_attn.v_proj.weight]
Loading weights: 21%|██ | 14/67 [00:00<00:00, 1130.71it/s, Materializing param=model.language_model.layers.0.self_attn.v_proj.weight]
Loading weights: 22%|██▏ | 15/67 [00:00<00:00, 1183.29it/s, Materializing param=model.language_model.layers.1.input_layernorm.weight]
Loading weights: 22%|██▏ | 15/67 [00:00<00:00, 1171.92it/s, Materializing param=model.language_model.layers.1.input_layernorm.weight]
Loading weights: 24%|██▍ | 16/67 [00:00<00:00, 1223.88it/s, Materializing param=model.language_model.layers.1.mlp.down_proj.weight]
Loading weights: 24%|██▍ | 16/67 [00:00<00:00, 1211.09it/s, Materializing param=model.language_model.layers.1.mlp.down_proj.weight]
Loading weights: 25%|██▌ | 17/67 [00:00<00:00, 1253.04it/s, Materializing param=model.language_model.layers.1.mlp.gate_proj.weight]
Loading weights: 25%|██▌ | 17/67 [00:00<00:00, 1241.63it/s, Materializing param=model.language_model.layers.1.mlp.gate_proj.weight]
Loading weights: 27%|██▋ | 18/67 [00:00<00:00, 1269.33it/s, Materializing param=model.language_model.layers.1.mlp.up_proj.weight]
Loading weights: 27%|██▋ | 18/67 [00:00<00:00, 1254.38it/s, Materializing param=model.language_model.layers.1.mlp.up_proj.weight]
Loading weights: 28%|██▊ | 19/67 [00:00<00:00, 1033.00it/s, Materializing param=model.language_model.layers.1.post_attention_layernorm.weight]
Loading weights: 28%|██▊ | 19/67 [00:00<00:00, 1022.50it/s, Materializing param=model.language_model.layers.1.post_attention_layernorm.weight]
Loading weights: 30%|██▉ | 20/67 [00:00<00:00, 1048.04it/s, Materializing param=model.language_model.layers.1.post_feedforward_layernorm.weight]
Loading weights: 30%|██▉ | 20/67 [00:00<00:00, 1040.99it/s, Materializing param=model.language_model.layers.1.post_feedforward_layernorm.weight]
Loading weights: 31%|███▏ | 21/67 [00:00<00:00, 1079.71it/s, Materializing param=model.language_model.layers.1.pre_feedforward_layernorm.weight]
Loading weights: 31%|███▏ | 21/67 [00:00<00:00, 1073.68it/s, Materializing param=model.language_model.layers.1.pre_feedforward_layernorm.weight]
Loading weights: 33%|███▎ | 22/67 [00:00<00:00, 1100.87it/s, Materializing param=model.language_model.layers.1.self_attn.k_norm.weight]
Loading weights: 33%|███▎ | 22/67 [00:00<00:00, 1094.78it/s, Materializing param=model.language_model.layers.1.self_attn.k_norm.weight]
Loading weights: 34%|███▍ | 23/67 [00:00<00:00, 1131.56it/s, Materializing param=model.language_model.layers.1.self_attn.k_proj.weight]
Loading weights: 34%|███▍ | 23/67 [00:00<00:00, 1125.50it/s, Materializing param=model.language_model.layers.1.self_attn.k_proj.weight]
Loading weights: 36%|███▌ | 24/67 [00:00<00:00, 1142.55it/s, Materializing param=model.language_model.layers.1.self_attn.o_proj.weight]
Loading weights: 36%|███▌ | 24/67 [00:00<00:00, 1133.09it/s, Materializing param=model.language_model.layers.1.self_attn.o_proj.weight]
Loading weights: 37%|███▋ | 25/67 [00:00<00:00, 1161.65it/s, Materializing param=model.language_model.layers.1.self_attn.q_norm.weight]
Loading weights: 37%|███▋ | 25/67 [00:00<00:00, 1155.26it/s, Materializing param=model.language_model.layers.1.self_attn.q_norm.weight]
Loading weights: 39%|███▉ | 26/67 [00:00<00:00, 1189.12it/s, Materializing param=model.language_model.layers.1.self_attn.q_proj.weight]
Loading weights: 39%|███▉ | 26/67 [00:00<00:00, 1183.58it/s, Materializing param=model.language_model.layers.1.self_attn.q_proj.weight]
Loading weights: 40%|████ | 27/67 [00:00<00:00, 1217.20it/s, Materializing param=model.language_model.layers.1.self_attn.v_proj.weight]
Loading weights: 40%|████ | 27/67 [00:00<00:00, 1211.97it/s, Materializing param=model.language_model.layers.1.self_attn.v_proj.weight]
Loading weights: 42%|████▏ | 28/67 [00:00<00:00, 1245.50it/s, Materializing param=model.language_model.norm.weight]
Loading weights: 42%|████▏ | 28/67 [00:00<00:00, 1240.15it/s, Materializing param=model.language_model.norm.weight]
Loading weights: 43%|████▎ | 29/67 [00:00<00:00, 1272.10it/s, Materializing param=model.multi_modal_projector.mm_input_projection_weight]
Loading weights: 43%|████▎ | 29/67 [00:00<00:00, 1264.66it/s, Materializing param=model.multi_modal_projector.mm_input_projection_weight]
Loading weights: 45%|████▍ | 30/67 [00:00<00:00, 1294.42it/s, Materializing param=model.multi_modal_projector.mm_soft_emb_norm.weight]
Loading weights: 45%|████▍ | 30/67 [00:00<00:00, 1288.72it/s, Materializing param=model.multi_modal_projector.mm_soft_emb_norm.weight]
Loading weights: 46%|████▋ | 31/67 [00:00<00:00, 1132.52it/s, Materializing param=model.vision_tower.vision_model.embeddings.patch_embedding.bias]
Loading weights: 46%|████▋ | 31/67 [00:00<00:00, 1125.27it/s, Materializing param=model.vision_tower.vision_model.embeddings.patch_embedding.bias]
Loading weights: 48%|████▊ | 32/67 [00:00<00:00, 1149.16it/s, Materializing param=model.vision_tower.vision_model.embeddings.patch_embedding.weight]
Loading weights: 48%|████▊ | 32/67 [00:00<00:00, 1144.20it/s, Materializing param=model.vision_tower.vision_model.embeddings.patch_embedding.weight]
Loading weights: 49%|████▉ | 33/67 [00:00<00:00, 1170.42it/s, Materializing param=model.vision_tower.vision_model.embeddings.position_embedding.weight]
Loading weights: 49%|████▉ | 33/67 [00:00<00:00, 1166.13it/s, Materializing param=model.vision_tower.vision_model.embeddings.position_embedding.weight]
Loading weights: 51%|█████ | 34/67 [00:00<00:00, 1152.95it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias]
Loading weights: 51%|█████ | 34/67 [00:00<00:00, 1146.04it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.layer_norm1.bias]
Loading weights: 52%|█████▏ | 35/67 [00:00<00:00, 1157.81it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight]
Loading weights: 52%|█████▏ | 35/67 [00:00<00:00, 1153.07it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.layer_norm1.weight]
Loading weights: 54%|█████▎ | 36/67 [00:00<00:00, 1176.28it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias]
Loading weights: 54%|█████▎ | 36/67 [00:00<00:00, 1172.33it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.layer_norm2.bias]
Loading weights: 55%|█████▌ | 37/67 [00:00<00:00, 1173.51it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight]
Loading weights: 55%|█████▌ | 37/67 [00:00<00:00, 1168.17it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.layer_norm2.weight]
Loading weights: 57%|█████▋ | 38/67 [00:00<00:00, 1076.14it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias]
Loading weights: 57%|█████▋ | 38/67 [00:00<00:00, 1070.77it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.bias]
Loading weights: 58%|█████▊ | 39/67 [00:00<00:00, 1090.08it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight]
Loading weights: 58%|█████▊ | 39/67 [00:00<00:00, 1086.41it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.mlp.fc1.weight]
Loading weights: 60%|█████▉ | 40/67 [00:00<00:00, 1106.43it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias]
Loading weights: 60%|█████▉ | 40/67 [00:00<00:00, 1103.10it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.bias]
Loading weights: 61%|██████ | 41/67 [00:00<00:00, 1123.30it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight]
Loading weights: 61%|██████ | 41/67 [00:00<00:00, 1120.21it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.mlp.fc2.weight]
Loading weights: 63%|██████▎ | 42/67 [00:00<00:00, 1130.90it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias]
Loading weights: 63%|██████▎ | 42/67 [00:00<00:00, 1126.48it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.bias]
Loading weights: 64%|██████▍ | 43/67 [00:00<00:00, 1145.61it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight]
Loading weights: 64%|██████▍ | 43/67 [00:00<00:00, 1142.47it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.k_proj.weight]
Loading weights: 66%|██████▌ | 44/67 [00:00<00:00, 1145.89it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias]
Loading weights: 66%|██████▌ | 44/67 [00:00<00:00, 1142.24it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.bias]
Loading weights: 67%|██████▋ | 45/67 [00:00<00:00, 1133.42it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight]
Loading weights: 67%|██████▋ | 45/67 [00:00<00:00, 1127.72it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.out_proj.weight]
Loading weights: 69%|██████▊ | 46/67 [00:00<00:00, 1122.28it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias]
Loading weights: 69%|██████▊ | 46/67 [00:00<00:00, 1118.57it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.bias]
Loading weights: 70%|███████ | 47/67 [00:00<00:00, 1096.84it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight]
Loading weights: 70%|███████ | 47/67 [00:00<00:00, 1091.81it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.q_proj.weight]
Loading weights: 72%|███████▏ | 48/67 [00:00<00:00, 1108.16it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias]
Loading weights: 72%|███████▏ | 48/67 [00:00<00:00, 1105.16it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.bias]
Loading weights: 73%|███████▎ | 49/67 [00:00<00:00, 1107.28it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight]
Loading weights: 73%|███████▎ | 49/67 [00:00<00:00, 1104.18it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.0.self_attn.v_proj.weight]
Loading weights: 75%|███████▍ | 50/67 [00:00<00:00, 1047.98it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias]
Loading weights: 75%|███████▍ | 50/67 [00:00<00:00, 1042.04it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.layer_norm1.bias]
Loading weights: 76%|███████▌ | 51/67 [00:00<00:00, 1049.61it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight]
Loading weights: 76%|███████▌ | 51/67 [00:00<00:00, 1038.56it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.layer_norm1.weight]
Loading weights: 78%|███████▊ | 52/67 [00:00<00:00, 1007.25it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias]
Loading weights: 78%|███████▊ | 52/67 [00:00<00:00, 1003.68it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.layer_norm2.bias]
Loading weights: 79%|███████▉ | 53/67 [00:00<00:00, 1010.40it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight]
Loading weights: 79%|███████▉ | 53/67 [00:00<00:00, 1007.54it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.layer_norm2.weight]
Loading weights: 81%|████████ | 54/67 [00:00<00:00, 1021.19it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias]
Loading weights: 81%|████████ | 54/67 [00:00<00:00, 1019.17it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.bias]
Loading weights: 82%|████████▏ | 55/67 [00:00<00:00, 1033.58it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight]
Loading weights: 82%|████████▏ | 55/67 [00:00<00:00, 1031.47it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.mlp.fc1.weight]
Loading weights: 84%|████████▎ | 56/67 [00:00<00:00, 1045.73it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias]
Loading weights: 84%|████████▎ | 56/67 [00:00<00:00, 1043.79it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.bias]
Loading weights: 85%|████████▌ | 57/67 [00:00<00:00, 1058.34it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight]
Loading weights: 85%|████████▌ | 57/67 [00:00<00:00, 1056.43it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.mlp.fc2.weight]
Loading weights: 87%|████████▋ | 58/67 [00:00<00:00, 1002.31it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias]
Loading weights: 87%|████████▋ | 58/67 [00:00<00:00, 1000.00it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.bias]
Loading weights: 88%|████████▊ | 59/67 [00:00<00:00, 1013.08it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight]
Loading weights: 88%|████████▊ | 59/67 [00:00<00:00, 1011.32it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.k_proj.weight]
Loading weights: 90%|████████▉ | 60/67 [00:00<00:00, 1024.70it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias]
Loading weights: 90%|████████▉ | 60/67 [00:00<00:00, 1022.93it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.bias]
Loading weights: 91%|█████████ | 61/67 [00:00<00:00, 1035.48it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight]
Loading weights: 91%|█████████ | 61/67 [00:00<00:00, 1033.58it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.out_proj.weight]
Loading weights: 93%|█████████▎| 62/67 [00:00<00:00, 1013.75it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias]
Loading weights: 93%|█████████▎| 62/67 [00:00<00:00, 1011.52it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.bias]
Loading weights: 94%|█████████▍| 63/67 [00:00<00:00, 1023.11it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight]
Loading weights: 94%|█████████▍| 63/67 [00:00<00:00, 1021.36it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.q_proj.weight]
Loading weights: 96%|█████████▌| 64/67 [00:00<00:00, 1033.82it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias]
Loading weights: 96%|█████████▌| 64/67 [00:00<00:00, 1032.16it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.bias]
Loading weights: 97%|█████████▋| 65/67 [00:00<00:00, 1044.69it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight]
Loading weights: 97%|█████████▋| 65/67 [00:00<00:00, 1042.96it/s, Materializing param=model.vision_tower.vision_model.encoder.layers.1.self_attn.v_proj.weight]
Loading weights: 99%|█████████▊| 66/67 [00:00<00:00, 1048.51it/s, Materializing param=model.vision_tower.vision_model.post_layernorm.bias]
Loading weights: 99%|█████████▊| 66/67 [00:00<00:00, 1046.54it/s, Materializing param=model.vision_tower.vision_model.post_layernorm.bias]
Loading weights: 100%|██████████| 67/67 [00:00<00:00, 1059.91it/s, Materializing param=model.vision_tower.vision_model.post_layernorm.weight]
Loading weights: 100%|██████████| 67/67 [00:00<00:00, 1058.43it/s, Materializing param=model.vision_tower.vision_model.post_layernorm.weight]
Loading weights: 100%|██████████| 67/67 [00:00<00:00, 1055.79it/s, Materializing param=model.vision_tower.vision_model.post_layernorm.weight]
The model to observe.
print("model type:", type(pipe.model))
model type: <class 'transformers.models.gemma3.modeling_gemma3.Gemma3ForConditionalGeneration'>
Captures inputs and outputs for the model.
observer = InputObserver(
missing=dict(pixel_values=torch.empty((0, 3, 896, 896), dtype=torch.float16))
)
with (
register_additional_serialization_functions(patch_transformers=True),
observer(pipe.model),
):
pipe(text=messages, max_new_tokens=4)
print(f"{observer.num_obs()} observations stored for encoder.")
3 observations stored for encoder.
Exports the model.
kwargs = observer.infer_arguments()
dynamic_shapes = observer.infer_dynamic_shapes(set_batch_dimension_for=True)
print(f"encoder kwargs={string_type(kwargs, with_shape=True)}")
print(f"encoder dynamic_shapes={dynamic_shapes}")
for candidate in observer.info.inputs:
print(
" ",
candidate,
candidate.str_obs(),
string_type(candidate.aligned_flat_list, with_shape=True),
)
filename = "plot_export_gemma3_tiny_input_observer.onnx"
with torch_export_patches(patch_transformers=True, patch_torch=True, stop_if_static=2):
to_onnx(
pipe.model,
args=(),
filename=filename,
kwargs=kwargs,
dynamic_shapes=dynamic_shapes,
exporter="custom",
)
encoder kwargs=dict(input_ids:T7s1x282,pixel_values:T10s1x3x896x896,attention_mask:T7s1x282,position_ids:T7s1x282,past_key_values:DynamicCache(DynamicSlidingWindowLayer(T10s1x1x0x32, T10s1x1x0x32), DynamicLayer(T10s1x1x0x32, T10s1x1x0x32)),token_type_ids:T7s1x282,cache_position:T7s282,logits_to_keep:int)
encoder dynamic_shapes={'input_ids': {0: DimHint(DYNAMIC), 1: DimHint(DYNAMIC)}, 'pixel_values': {0: DimHint(DYNAMIC)}, 'attention_mask': {0: DimHint(DYNAMIC), 1: DimHint(DYNAMIC)}, 'position_ids': {0: DimHint(DYNAMIC), 1: DimHint(DYNAMIC)}, 'past_key_values': [{0: DimHint(DYNAMIC), 2: DimHint(DYNAMIC)}, {0: DimHint(DYNAMIC), 2: DimHint(DYNAMIC)}, {0: DimHint(DYNAMIC), 2: DimHint(DYNAMIC)}, {0: DimHint(DYNAMIC), 2: DimHint(DYNAMIC)}], 'token_type_ids': {0: DimHint(DYNAMIC), 1: DimHint(DYNAMIC)}, 'cache_position': {0: DimHint(DYNAMIC)}, 'logits_to_keep': None}
InputCandidate(0 args, 7 kwargs, 6 tensors, 10 aligned tensors) InputCandidate(args=(), kwargs=dict(input_ids:T7s1x282,pixel_values:T10s1x3x896x896,attention_mask:T7s1x282,position_ids:T7s1x282,past_key_values:DynamicCache(key_cache=#0[], value_cache=#0[]),token_type_ids:T7s1x282,cache_position:T7s282), cst_kwargs={'logits_to_keep': 1}) #10[T7s1x282,T10s1x3x896x896,T7s1x282,T7s1x282,None,None,None,None,T7s1x282,T7s282]
InputCandidate(0 args, 7 kwargs, 10 tensors, 10 aligned tensors) InputCandidate(args=(), kwargs=dict(input_ids:T7s1x1,pixel_values:T10s0x3x896x896,attention_mask:T7s1x283,position_ids:T7s1x1,past_key_values:DynamicCache(DynamicSlidingWindowLayer(T10s1x1x282x32, T10s1x1x282x32), DynamicLayer(T10s1x1x282x32, T10s1x1x282x32)),token_type_ids:T7s1x1,cache_position:T7s1), cst_kwargs={'logits_to_keep': 1}) #10[T7s1x1,T10s0x3x896x896,T7s1x283,T7s1x1,T10s1x1x282x32,T10s1x1x282x32,T10s1x1x282x32,T10s1x1x282x32,T7s1x1,T7s1]
InputCandidate(0 args, 7 kwargs, 10 tensors, 10 aligned tensors) InputCandidate(args=(), kwargs=dict(input_ids:T7s1x1,pixel_values:T10s0x3x896x896,attention_mask:T7s1x284,position_ids:T7s1x1,past_key_values:DynamicCache(DynamicSlidingWindowLayer(T10s1x1x283x32, T10s1x1x283x32), DynamicLayer(T10s1x1x283x32, T10s1x1x283x32)),token_type_ids:T7s1x1,cache_position:T7s1), cst_kwargs={'logits_to_keep': 1}) #10[T7s1x1,T10s0x3x896x896,T7s1x284,T7s1x1,T10s1x1x283x32,T10s1x1x283x32,T10s1x1x283x32,T10s1x1x283x32,T7s1x1,T7s1]
Let’s measure the discrepancies.
data = observer.check_discrepancies(filename, progress_bar=True, atol=1e-2, include_io=True)
df = pandas.DataFrame(data)
df.to_excel("plot_export_gemma3_tiny_input_observer.xlsx")
print(df)
0%| | 0/3 [00:00<?, ?it/s]
33%|███▎ | 1/3 [00:00<00:01, 1.80it/s]
100%|██████████| 3/3 [00:00<00:00, 5.04it/s]
abs rel sum n dnan dev >0.1 >0.01 SUCCESS index duration_torch ort_duration n_inputs n_none n_empty inputs outputs_torch outputs_ort
0 0.007812 3.222308 362.949877 306496.0 0 0 0 0 False 9 0.507734 0.531120 10 4 4 dict(input_ids:T7s1x282,pixel_values:T10s1x3x8... #6[T10s1x1x262208,T10s1x1x282x32,T10s1x1x282x3... #6[T10s1x1x262208,T10s1x1x282x32,T10s1x1x282x3...
1 0.007812 3.164428 321.540422 298432.0 0 0 0 0 False 9 0.005239 0.013916 10 0 1 dict(input_ids:T7s1x1,pixel_values:T10s0x3x896... #5[T10s1x1x262208,T10s1x1x283x32,T10s1x1x283x3... #5[T10s1x1x262208,T10s1x1x283x32,T10s1x1x283x3...
2 0.007812 3.135021 382.645088 298560.0 0 0 0 0 False 9 0.005925 0.003836 10 0 1 dict(input_ids:T7s1x1,pixel_values:T10s0x3x896... #5[T10s1x1x262208,T10s1x1x284x32,T10s1x1x284x3... #5[T10s1x1x262208,T10s1x1x284x32,T10s1x1x284x3...
Let’s show the errors.

Total running time of the script: (0 minutes 29.319 seconds)
Related examples
Export a LLM through method generate (with Tiny-LLM)
Export a LLM through method generate (with Tiny-LLM)