Note
Go to the end to download the full example code.
From a LLM to processing a prompt¶
Method generate generates the model answer for a given prompt.
Let’s implement our own to understand better how it works and
then apply it to an ONNX model.
Example with Phi 1.5¶
epkg:microsoft/Phi-1.5 is a small LLM. The example given
import os
import time
import sys
import pandas
from tqdm import tqdm
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from onnx_diagnostic.ext_test_case import unit_test_going
from onnx_diagnostic.helpers import string_type
from onnx_diagnostic.helpers.torch_helper import to_any, get_weight_type
from onnx_diagnostic.helpers.rt_helper import onnx_generate
from onnx_diagnostic.torch_export_patches import torch_export_patches
from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
from onnx_diagnostic.torch_models.hghub.hub_api import get_pretrained_config, task_from_id
from onnx_diagnostic.tasks import random_input_kwargs
from onnx_diagnostic.export.api import to_onnx
device = "cuda" if torch.cuda.is_available() else "cpu"
data = []
print("-- load the model...")
if unit_test_going():
# unit_test_going() returns True if UNITTEST_GOING is 1
# The example switches to a faster scenario.
model_id = "arnir0/Tiny-LLM"
data_export = get_untrained_model_with_inputs(model_id)
model = data_export["model"]
export_inputs = data_export["inputs"]
export_shapes = data_export["dynamic_shapes"]
tokenizer = AutoTokenizer.from_pretrained(model_id)
else:
model_id = "microsoft/phi-1_5"
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = get_pretrained_config(model_id)
task = task = task_from_id(model_id)
kwargs, fct = random_input_kwargs(config, task)
res = fct(model, config, add_second_input=False, **kwargs)
export_inputs = res["inputs"]
export_shapes = res["dynamic_shapes"]
model = model.to(device)
print("-- done.")
print("-- tokenize the prompt...")
inputs = tokenizer(
'''def print_prime(n):
"""
Print all primes between 1 and n
"""''',
return_tensors="pt",
return_attention_mask=False,
).to(device)
print("-- done.")
print("-- compute the answer...")
begin = time.perf_counter()
outputs = model.generate(**inputs, max_new_tokens=100)
duration = time.perf_counter() - begin
print(f"-- done in {duration}")
data.append(dict(name="generate", duration=duration))
print("output shape:", string_type(outputs, with_shape=True, with_min_max=True))
print("-- decode the answer...")
text = tokenizer.batch_decode(outputs)[0]
print("-- done.")
print(text)
-- load the model...
Loading weights: 0%| | 0/341 [00:00<?, ?it/s]
Loading weights: 0%| | 1/341 [00:00<00:00, 23172.95it/s, Materializing param=lm_head.bias]
Loading weights: 0%| | 1/341 [00:00<00:00, 6820.01it/s, Materializing param=lm_head.bias]
Loading weights: 1%| | 2/341 [00:00<00:00, 4619.28it/s, Materializing param=lm_head.weight]
Loading weights: 1%| | 2/341 [00:00<00:00, 3724.96it/s, Materializing param=lm_head.weight]
Loading weights: 1%| | 3/341 [00:00<00:00, 3200.94it/s, Materializing param=model.embed_tokens.weight]
Loading weights: 1%| | 3/341 [00:00<00:00, 2912.71it/s, Materializing param=model.embed_tokens.weight]
Loading weights: 1%| | 4/341 [00:00<00:00, 2793.41it/s, Materializing param=model.final_layernorm.bias]
Loading weights: 1%| | 4/341 [00:00<00:00, 2628.42it/s, Materializing param=model.final_layernorm.bias]
Loading weights: 1%|▏ | 5/341 [00:00<00:00, 2719.34it/s, Materializing param=model.final_layernorm.weight]
Loading weights: 1%|▏ | 5/341 [00:00<00:00, 2587.16it/s, Materializing param=model.final_layernorm.weight]
Loading weights: 2%|▏ | 6/341 [00:00<00:00, 1225.03it/s, Materializing param=model.layers.0.input_layernorm.bias]
Loading weights: 2%|▏ | 6/341 [00:00<00:00, 1175.42it/s, Materializing param=model.layers.0.input_layernorm.bias]
Loading weights: 2%|▏ | 7/341 [00:00<00:00, 1295.62it/s, Materializing param=model.layers.0.input_layernorm.weight]
Loading weights: 2%|▏ | 7/341 [00:00<00:00, 1271.55it/s, Materializing param=model.layers.0.input_layernorm.weight]
Loading weights: 2%|▏ | 8/341 [00:00<00:00, 785.89it/s, Materializing param=model.layers.0.mlp.fc1.bias]
Loading weights: 2%|▏ | 8/341 [00:00<00:00, 769.24it/s, Materializing param=model.layers.0.mlp.fc1.bias]
Loading weights: 3%|▎ | 9/341 [00:00<00:00, 825.99it/s, Materializing param=model.layers.0.mlp.fc1.weight]
Loading weights: 3%|▎ | 9/341 [00:00<00:00, 816.61it/s, Materializing param=model.layers.0.mlp.fc1.weight]
Loading weights: 3%|▎ | 10/341 [00:00<00:00, 881.06it/s, Materializing param=model.layers.0.mlp.fc2.bias]
Loading weights: 3%|▎ | 10/341 [00:00<00:00, 874.00it/s, Materializing param=model.layers.0.mlp.fc2.bias]
Loading weights: 3%|▎ | 11/341 [00:00<00:00, 934.07it/s, Materializing param=model.layers.0.mlp.fc2.weight]
Loading weights: 3%|▎ | 11/341 [00:00<00:00, 927.20it/s, Materializing param=model.layers.0.mlp.fc2.weight]
Loading weights: 4%|▎ | 12/341 [00:00<00:00, 966.06it/s, Materializing param=model.layers.0.self_attn.dense.bias]
Loading weights: 4%|▎ | 12/341 [00:00<00:00, 944.98it/s, Materializing param=model.layers.0.self_attn.dense.bias]
Loading weights: 4%|▍ | 13/341 [00:00<00:00, 959.25it/s, Materializing param=model.layers.0.self_attn.dense.weight]
Loading weights: 4%|▍ | 13/341 [00:00<00:00, 939.44it/s, Materializing param=model.layers.0.self_attn.dense.weight]
Loading weights: 4%|▍ | 14/341 [00:00<00:00, 920.47it/s, Materializing param=model.layers.0.self_attn.k_proj.bias]
Loading weights: 4%|▍ | 14/341 [00:00<00:00, 906.62it/s, Materializing param=model.layers.0.self_attn.k_proj.bias]
Loading weights: 4%|▍ | 15/341 [00:00<00:00, 922.05it/s, Materializing param=model.layers.0.self_attn.k_proj.weight]
Loading weights: 4%|▍ | 15/341 [00:00<00:00, 914.52it/s, Materializing param=model.layers.0.self_attn.k_proj.weight]
Loading weights: 5%|▍ | 16/341 [00:00<00:00, 953.96it/s, Materializing param=model.layers.0.self_attn.q_proj.bias]
Loading weights: 5%|▍ | 16/341 [00:00<00:00, 948.63it/s, Materializing param=model.layers.0.self_attn.q_proj.bias]
Loading weights: 5%|▍ | 17/341 [00:00<00:00, 988.74it/s, Materializing param=model.layers.0.self_attn.q_proj.weight]
Loading weights: 5%|▍ | 17/341 [00:00<00:00, 984.03it/s, Materializing param=model.layers.0.self_attn.q_proj.weight]
Loading weights: 5%|▌ | 18/341 [00:00<00:00, 1022.60it/s, Materializing param=model.layers.0.self_attn.v_proj.bias]
Loading weights: 5%|▌ | 18/341 [00:00<00:00, 1017.65it/s, Materializing param=model.layers.0.self_attn.v_proj.bias]
Loading weights: 6%|▌ | 19/341 [00:00<00:00, 1053.93it/s, Materializing param=model.layers.0.self_attn.v_proj.weight]
Loading weights: 6%|▌ | 19/341 [00:00<00:00, 1048.80it/s, Materializing param=model.layers.0.self_attn.v_proj.weight]
Loading weights: 6%|▌ | 20/341 [00:00<00:00, 958.46it/s, Materializing param=model.layers.1.input_layernorm.bias]
Loading weights: 6%|▌ | 20/341 [00:00<00:00, 949.68it/s, Materializing param=model.layers.1.input_layernorm.bias]
Loading weights: 6%|▌ | 21/341 [00:00<00:00, 914.10it/s, Materializing param=model.layers.1.input_layernorm.weight]
Loading weights: 6%|▌ | 21/341 [00:00<00:00, 906.76it/s, Materializing param=model.layers.1.input_layernorm.weight]
Loading weights: 6%|▋ | 22/341 [00:00<00:00, 939.24it/s, Materializing param=model.layers.1.mlp.fc1.bias]
Loading weights: 6%|▋ | 22/341 [00:00<00:00, 935.50it/s, Materializing param=model.layers.1.mlp.fc1.bias]
Loading weights: 7%|▋ | 23/341 [00:00<00:00, 851.68it/s, Materializing param=model.layers.1.mlp.fc1.weight]
Loading weights: 7%|▋ | 23/341 [00:00<00:00, 846.49it/s, Materializing param=model.layers.1.mlp.fc1.weight]
Loading weights: 7%|▋ | 24/341 [00:00<00:00, 867.36it/s, Materializing param=model.layers.1.mlp.fc2.bias]
Loading weights: 7%|▋ | 24/341 [00:00<00:00, 863.05it/s, Materializing param=model.layers.1.mlp.fc2.bias]
Loading weights: 7%|▋ | 25/341 [00:00<00:00, 855.95it/s, Materializing param=model.layers.1.mlp.fc2.weight]
Loading weights: 7%|▋ | 25/341 [00:00<00:00, 852.79it/s, Materializing param=model.layers.1.mlp.fc2.weight]
Loading weights: 8%|▊ | 26/341 [00:00<00:00, 875.22it/s, Materializing param=model.layers.1.self_attn.dense.bias]
Loading weights: 8%|▊ | 26/341 [00:00<00:00, 871.40it/s, Materializing param=model.layers.1.self_attn.dense.bias]
Loading weights: 8%|▊ | 27/341 [00:00<00:00, 883.39it/s, Materializing param=model.layers.1.self_attn.dense.weight]
Loading weights: 8%|▊ | 27/341 [00:00<00:00, 880.55it/s, Materializing param=model.layers.1.self_attn.dense.weight]
Loading weights: 8%|▊ | 28/341 [00:00<00:00, 858.24it/s, Materializing param=model.layers.1.self_attn.k_proj.bias]
Loading weights: 8%|▊ | 28/341 [00:00<00:00, 854.95it/s, Materializing param=model.layers.1.self_attn.k_proj.bias]
Loading weights: 9%|▊ | 29/341 [00:00<00:00, 837.73it/s, Materializing param=model.layers.1.self_attn.k_proj.weight]
Loading weights: 9%|▊ | 29/341 [00:00<00:00, 831.75it/s, Materializing param=model.layers.1.self_attn.k_proj.weight]
Loading weights: 9%|▉ | 30/341 [00:00<00:00, 841.06it/s, Materializing param=model.layers.1.self_attn.q_proj.bias]
Loading weights: 9%|▉ | 30/341 [00:00<00:00, 835.96it/s, Materializing param=model.layers.1.self_attn.q_proj.bias]
Loading weights: 9%|▉ | 31/341 [00:00<00:00, 833.13it/s, Materializing param=model.layers.1.self_attn.q_proj.weight]
Loading weights: 9%|▉ | 31/341 [00:00<00:00, 827.35it/s, Materializing param=model.layers.1.self_attn.q_proj.weight]
Loading weights: 9%|▉ | 32/341 [00:00<00:00, 821.82it/s, Materializing param=model.layers.1.self_attn.v_proj.bias]
Loading weights: 9%|▉ | 32/341 [00:00<00:00, 818.38it/s, Materializing param=model.layers.1.self_attn.v_proj.bias]
Loading weights: 10%|▉ | 33/341 [00:00<00:00, 827.46it/s, Materializing param=model.layers.1.self_attn.v_proj.weight]
Loading weights: 10%|▉ | 33/341 [00:00<00:00, 823.15it/s, Materializing param=model.layers.1.self_attn.v_proj.weight]
Loading weights: 10%|▉ | 34/341 [00:00<00:00, 734.18it/s, Materializing param=model.layers.2.input_layernorm.bias]
Loading weights: 10%|▉ | 34/341 [00:00<00:00, 729.26it/s, Materializing param=model.layers.2.input_layernorm.bias]
Loading weights: 10%|█ | 35/341 [00:00<00:00, 719.92it/s, Materializing param=model.layers.2.input_layernorm.weight]
Loading weights: 10%|█ | 35/341 [00:00<00:00, 715.11it/s, Materializing param=model.layers.2.input_layernorm.weight]
Loading weights: 11%|█ | 36/341 [00:00<00:00, 663.17it/s, Materializing param=model.layers.2.mlp.fc1.bias]
Loading weights: 11%|█ | 36/341 [00:00<00:00, 659.07it/s, Materializing param=model.layers.2.mlp.fc1.bias]
Loading weights: 11%|█ | 37/341 [00:00<00:00, 636.22it/s, Materializing param=model.layers.2.mlp.fc1.weight]
Loading weights: 11%|█ | 37/341 [00:00<00:00, 634.47it/s, Materializing param=model.layers.2.mlp.fc1.weight]
Loading weights: 11%|█ | 38/341 [00:00<00:00, 647.84it/s, Materializing param=model.layers.2.mlp.fc2.bias]
Loading weights: 11%|█ | 38/341 [00:00<00:00, 646.74it/s, Materializing param=model.layers.2.mlp.fc2.bias]
Loading weights: 11%|█▏ | 39/341 [00:00<00:00, 602.76it/s, Materializing param=model.layers.2.mlp.fc2.weight]
Loading weights: 11%|█▏ | 39/341 [00:00<00:00, 599.78it/s, Materializing param=model.layers.2.mlp.fc2.weight]
Loading weights: 12%|█▏ | 40/341 [00:00<00:00, 609.43it/s, Materializing param=model.layers.2.self_attn.dense.bias]
Loading weights: 12%|█▏ | 40/341 [00:00<00:00, 607.91it/s, Materializing param=model.layers.2.self_attn.dense.bias]
Loading weights: 12%|█▏ | 41/341 [00:00<00:00, 610.21it/s, Materializing param=model.layers.2.self_attn.dense.weight]
Loading weights: 12%|█▏ | 41/341 [00:00<00:00, 608.67it/s, Materializing param=model.layers.2.self_attn.dense.weight]
Loading weights: 12%|█▏ | 42/341 [00:00<00:00, 587.33it/s, Materializing param=model.layers.2.self_attn.k_proj.bias]
Loading weights: 12%|█▏ | 42/341 [00:00<00:00, 585.57it/s, Materializing param=model.layers.2.self_attn.k_proj.bias]
Loading weights: 13%|█▎ | 43/341 [00:00<00:00, 559.95it/s, Materializing param=model.layers.2.self_attn.k_proj.weight]
Loading weights: 13%|█▎ | 43/341 [00:00<00:00, 557.55it/s, Materializing param=model.layers.2.self_attn.k_proj.weight]
Loading weights: 13%|█▎ | 44/341 [00:00<00:00, 532.06it/s, Materializing param=model.layers.2.self_attn.q_proj.bias]
Loading weights: 13%|█▎ | 44/341 [00:00<00:00, 529.95it/s, Materializing param=model.layers.2.self_attn.q_proj.bias]
Loading weights: 13%|█▎ | 45/341 [00:00<00:00, 529.71it/s, Materializing param=model.layers.2.self_attn.q_proj.weight]
Loading weights: 13%|█▎ | 45/341 [00:00<00:00, 527.79it/s, Materializing param=model.layers.2.self_attn.q_proj.weight]
Loading weights: 13%|█▎ | 46/341 [00:00<00:00, 523.09it/s, Materializing param=model.layers.2.self_attn.v_proj.bias]
Loading weights: 13%|█▎ | 46/341 [00:00<00:00, 520.68it/s, Materializing param=model.layers.2.self_attn.v_proj.bias]
Loading weights: 14%|█▍ | 47/341 [00:00<00:00, 520.51it/s, Materializing param=model.layers.2.self_attn.v_proj.weight]
Loading weights: 14%|█▍ | 47/341 [00:00<00:00, 519.49it/s, Materializing param=model.layers.2.self_attn.v_proj.weight]
Loading weights: 14%|█▍ | 48/341 [00:00<00:00, 528.36it/s, Materializing param=model.layers.3.input_layernorm.bias]
Loading weights: 14%|█▍ | 48/341 [00:00<00:00, 527.78it/s, Materializing param=model.layers.3.input_layernorm.bias]
Loading weights: 14%|█▍ | 49/341 [00:00<00:00, 536.26it/s, Materializing param=model.layers.3.input_layernorm.weight]
Loading weights: 14%|█▍ | 49/341 [00:00<00:00, 535.73it/s, Materializing param=model.layers.3.input_layernorm.weight]
Loading weights: 15%|█▍ | 50/341 [00:00<00:00, 544.94it/s, Materializing param=model.layers.3.mlp.fc1.bias]
Loading weights: 15%|█▍ | 50/341 [00:00<00:00, 544.43it/s, Materializing param=model.layers.3.mlp.fc1.bias]
Loading weights: 15%|█▍ | 51/341 [00:00<00:00, 547.72it/s, Materializing param=model.layers.3.mlp.fc1.weight]
Loading weights: 15%|█▍ | 51/341 [00:00<00:00, 546.09it/s, Materializing param=model.layers.3.mlp.fc1.weight]
Loading weights: 15%|█▌ | 52/341 [00:00<00:00, 541.83it/s, Materializing param=model.layers.3.mlp.fc2.bias]
Loading weights: 15%|█▌ | 52/341 [00:00<00:00, 539.83it/s, Materializing param=model.layers.3.mlp.fc2.bias]
Loading weights: 16%|█▌ | 53/341 [00:00<00:00, 542.15it/s, Materializing param=model.layers.3.mlp.fc2.weight]
Loading weights: 16%|█▌ | 53/341 [00:00<00:00, 541.32it/s, Materializing param=model.layers.3.mlp.fc2.weight]
Loading weights: 16%|█▌ | 54/341 [00:00<00:00, 549.76it/s, Materializing param=model.layers.3.self_attn.dense.bias]
Loading weights: 16%|█▌ | 54/341 [00:00<00:00, 549.22it/s, Materializing param=model.layers.3.self_attn.dense.bias]
Loading weights: 16%|█▌ | 55/341 [00:00<00:00, 557.28it/s, Materializing param=model.layers.3.self_attn.dense.weight]
Loading weights: 16%|█▌ | 55/341 [00:00<00:00, 556.77it/s, Materializing param=model.layers.3.self_attn.dense.weight]
Loading weights: 16%|█▋ | 56/341 [00:00<00:00, 564.73it/s, Materializing param=model.layers.3.self_attn.k_proj.bias]
Loading weights: 16%|█▋ | 56/341 [00:00<00:00, 564.24it/s, Materializing param=model.layers.3.self_attn.k_proj.bias]
Loading weights: 17%|█▋ | 57/341 [00:00<00:00, 572.12it/s, Materializing param=model.layers.3.self_attn.k_proj.weight]
Loading weights: 17%|█▋ | 57/341 [00:00<00:00, 571.63it/s, Materializing param=model.layers.3.self_attn.k_proj.weight]
Loading weights: 17%|█▋ | 58/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.3.self_attn.k_proj.weight]
Loading weights: 17%|█▋ | 58/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.3.self_attn.q_proj.bias]
Loading weights: 17%|█▋ | 58/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.3.self_attn.q_proj.bias]
Loading weights: 17%|█▋ | 59/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.3.self_attn.q_proj.weight]
Loading weights: 17%|█▋ | 59/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.3.self_attn.q_proj.weight]
Loading weights: 18%|█▊ | 60/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.3.self_attn.v_proj.bias]
Loading weights: 18%|█▊ | 60/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.3.self_attn.v_proj.bias]
Loading weights: 18%|█▊ | 61/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.3.self_attn.v_proj.weight]
Loading weights: 18%|█▊ | 61/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.3.self_attn.v_proj.weight]
Loading weights: 18%|█▊ | 62/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.input_layernorm.bias]
Loading weights: 18%|█▊ | 62/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.input_layernorm.bias]
Loading weights: 18%|█▊ | 63/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.input_layernorm.weight]
Loading weights: 18%|█▊ | 63/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.input_layernorm.weight]
Loading weights: 19%|█▉ | 64/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.mlp.fc1.bias]
Loading weights: 19%|█▉ | 64/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.mlp.fc1.bias]
Loading weights: 19%|█▉ | 65/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.mlp.fc1.weight]
Loading weights: 19%|█▉ | 65/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.mlp.fc1.weight]
Loading weights: 19%|█▉ | 66/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.mlp.fc2.bias]
Loading weights: 19%|█▉ | 66/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.mlp.fc2.bias]
Loading weights: 20%|█▉ | 67/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.mlp.fc2.weight]
Loading weights: 20%|█▉ | 67/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.mlp.fc2.weight]
Loading weights: 20%|█▉ | 68/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.dense.bias]
Loading weights: 20%|█▉ | 68/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.dense.bias]
Loading weights: 20%|██ | 69/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.dense.weight]
Loading weights: 20%|██ | 69/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.dense.weight]
Loading weights: 21%|██ | 70/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.k_proj.bias]
Loading weights: 21%|██ | 70/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.k_proj.bias]
Loading weights: 21%|██ | 71/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.k_proj.weight]
Loading weights: 21%|██ | 71/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.k_proj.weight]
Loading weights: 21%|██ | 72/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.q_proj.bias]
Loading weights: 21%|██ | 72/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.q_proj.bias]
Loading weights: 21%|██▏ | 73/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.q_proj.weight]
Loading weights: 21%|██▏ | 73/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.q_proj.weight]
Loading weights: 22%|██▏ | 74/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.v_proj.bias]
Loading weights: 22%|██▏ | 74/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.v_proj.bias]
Loading weights: 22%|██▏ | 75/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.v_proj.weight]
Loading weights: 22%|██▏ | 75/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.4.self_attn.v_proj.weight]
Loading weights: 22%|██▏ | 76/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.input_layernorm.bias]
Loading weights: 22%|██▏ | 76/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.input_layernorm.bias]
Loading weights: 23%|██▎ | 77/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.input_layernorm.weight]
Loading weights: 23%|██▎ | 77/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.input_layernorm.weight]
Loading weights: 23%|██▎ | 78/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.mlp.fc1.bias]
Loading weights: 23%|██▎ | 78/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.mlp.fc1.bias]
Loading weights: 23%|██▎ | 79/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.mlp.fc1.weight]
Loading weights: 23%|██▎ | 79/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.mlp.fc1.weight]
Loading weights: 23%|██▎ | 80/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.mlp.fc2.bias]
Loading weights: 23%|██▎ | 80/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.mlp.fc2.bias]
Loading weights: 24%|██▍ | 81/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.mlp.fc2.weight]
Loading weights: 24%|██▍ | 81/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.mlp.fc2.weight]
Loading weights: 24%|██▍ | 82/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.dense.bias]
Loading weights: 24%|██▍ | 82/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.dense.bias]
Loading weights: 24%|██▍ | 83/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.dense.weight]
Loading weights: 24%|██▍ | 83/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.dense.weight]
Loading weights: 25%|██▍ | 84/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.k_proj.bias]
Loading weights: 25%|██▍ | 84/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.k_proj.bias]
Loading weights: 25%|██▍ | 85/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.k_proj.weight]
Loading weights: 25%|██▍ | 85/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.k_proj.weight]
Loading weights: 25%|██▌ | 86/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.q_proj.bias]
Loading weights: 25%|██▌ | 86/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.q_proj.bias]
Loading weights: 26%|██▌ | 87/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.q_proj.weight]
Loading weights: 26%|██▌ | 87/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.q_proj.weight]
Loading weights: 26%|██▌ | 88/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.v_proj.bias]
Loading weights: 26%|██▌ | 88/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.v_proj.bias]
Loading weights: 26%|██▌ | 89/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.v_proj.weight]
Loading weights: 26%|██▌ | 89/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.5.self_attn.v_proj.weight]
Loading weights: 26%|██▋ | 90/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.input_layernorm.bias]
Loading weights: 26%|██▋ | 90/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.input_layernorm.bias]
Loading weights: 27%|██▋ | 91/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.input_layernorm.weight]
Loading weights: 27%|██▋ | 91/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.input_layernorm.weight]
Loading weights: 27%|██▋ | 92/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.mlp.fc1.bias]
Loading weights: 27%|██▋ | 92/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.mlp.fc1.bias]
Loading weights: 27%|██▋ | 93/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.mlp.fc1.weight]
Loading weights: 27%|██▋ | 93/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.mlp.fc1.weight]
Loading weights: 28%|██▊ | 94/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.mlp.fc2.bias]
Loading weights: 28%|██▊ | 94/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.mlp.fc2.bias]
Loading weights: 28%|██▊ | 95/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.mlp.fc2.weight]
Loading weights: 28%|██▊ | 95/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.mlp.fc2.weight]
Loading weights: 28%|██▊ | 96/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.dense.bias]
Loading weights: 28%|██▊ | 96/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.dense.bias]
Loading weights: 28%|██▊ | 97/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.dense.weight]
Loading weights: 28%|██▊ | 97/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.dense.weight]
Loading weights: 29%|██▊ | 98/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.k_proj.bias]
Loading weights: 29%|██▊ | 98/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.k_proj.bias]
Loading weights: 29%|██▉ | 99/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.k_proj.weight]
Loading weights: 29%|██▉ | 99/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.k_proj.weight]
Loading weights: 29%|██▉ | 100/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.q_proj.bias]
Loading weights: 29%|██▉ | 100/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.q_proj.bias]
Loading weights: 30%|██▉ | 101/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.q_proj.weight]
Loading weights: 30%|██▉ | 101/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.q_proj.weight]
Loading weights: 30%|██▉ | 102/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.v_proj.bias]
Loading weights: 30%|██▉ | 102/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.v_proj.bias]
Loading weights: 30%|███ | 103/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.v_proj.weight]
Loading weights: 30%|███ | 103/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.6.self_attn.v_proj.weight]
Loading weights: 30%|███ | 104/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.input_layernorm.bias]
Loading weights: 30%|███ | 104/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.input_layernorm.bias]
Loading weights: 31%|███ | 105/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.input_layernorm.weight]
Loading weights: 31%|███ | 105/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.input_layernorm.weight]
Loading weights: 31%|███ | 106/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.mlp.fc1.bias]
Loading weights: 31%|███ | 106/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.mlp.fc1.bias]
Loading weights: 31%|███▏ | 107/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.mlp.fc1.weight]
Loading weights: 31%|███▏ | 107/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.mlp.fc1.weight]
Loading weights: 32%|███▏ | 108/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.mlp.fc2.bias]
Loading weights: 32%|███▏ | 108/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.mlp.fc2.bias]
Loading weights: 32%|███▏ | 109/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.mlp.fc2.weight]
Loading weights: 32%|███▏ | 109/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.mlp.fc2.weight]
Loading weights: 32%|███▏ | 110/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.dense.bias]
Loading weights: 32%|███▏ | 110/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.dense.bias]
Loading weights: 33%|███▎ | 111/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.dense.weight]
Loading weights: 33%|███▎ | 111/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.dense.weight]
Loading weights: 33%|███▎ | 112/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.k_proj.bias]
Loading weights: 33%|███▎ | 112/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.k_proj.bias]
Loading weights: 33%|███▎ | 113/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.k_proj.weight]
Loading weights: 33%|███▎ | 113/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.k_proj.weight]
Loading weights: 33%|███▎ | 114/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.q_proj.bias]
Loading weights: 33%|███▎ | 114/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.q_proj.bias]
Loading weights: 34%|███▎ | 115/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.q_proj.weight]
Loading weights: 34%|███▎ | 115/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.q_proj.weight]
Loading weights: 34%|███▍ | 116/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.v_proj.bias]
Loading weights: 34%|███▍ | 116/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.v_proj.bias]
Loading weights: 34%|███▍ | 117/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.v_proj.weight]
Loading weights: 34%|███▍ | 117/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.7.self_attn.v_proj.weight]
Loading weights: 35%|███▍ | 118/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.8.input_layernorm.bias]
Loading weights: 35%|███▍ | 118/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.8.input_layernorm.bias]
Loading weights: 35%|███▍ | 119/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.8.input_layernorm.weight]
Loading weights: 35%|███▍ | 119/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.8.input_layernorm.weight]
Loading weights: 35%|███▌ | 120/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.8.mlp.fc1.bias]
Loading weights: 35%|███▌ | 120/341 [00:00<00:00, 579.61it/s, Materializing param=model.layers.8.mlp.fc1.bias]
Loading weights: 35%|███▌ | 121/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.mlp.fc1.bias]
Loading weights: 35%|███▌ | 121/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.mlp.fc1.weight]
Loading weights: 35%|███▌ | 121/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.mlp.fc1.weight]
Loading weights: 36%|███▌ | 122/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.mlp.fc2.bias]
Loading weights: 36%|███▌ | 122/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.mlp.fc2.bias]
Loading weights: 36%|███▌ | 123/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.mlp.fc2.weight]
Loading weights: 36%|███▌ | 123/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.mlp.fc2.weight]
Loading weights: 36%|███▋ | 124/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.dense.bias]
Loading weights: 36%|███▋ | 124/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.dense.bias]
Loading weights: 37%|███▋ | 125/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.dense.weight]
Loading weights: 37%|███▋ | 125/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.dense.weight]
Loading weights: 37%|███▋ | 126/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.k_proj.bias]
Loading weights: 37%|███▋ | 126/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.k_proj.bias]
Loading weights: 37%|███▋ | 127/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.k_proj.weight]
Loading weights: 37%|███▋ | 127/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.k_proj.weight]
Loading weights: 38%|███▊ | 128/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.q_proj.bias]
Loading weights: 38%|███▊ | 128/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.q_proj.bias]
Loading weights: 38%|███▊ | 129/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.q_proj.weight]
Loading weights: 38%|███▊ | 129/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.q_proj.weight]
Loading weights: 38%|███▊ | 130/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.v_proj.bias]
Loading weights: 38%|███▊ | 130/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.v_proj.bias]
Loading weights: 38%|███▊ | 131/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.v_proj.weight]
Loading weights: 38%|███▊ | 131/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.8.self_attn.v_proj.weight]
Loading weights: 39%|███▊ | 132/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.input_layernorm.bias]
Loading weights: 39%|███▊ | 132/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.input_layernorm.bias]
Loading weights: 39%|███▉ | 133/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.input_layernorm.weight]
Loading weights: 39%|███▉ | 133/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.input_layernorm.weight]
Loading weights: 39%|███▉ | 134/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.mlp.fc1.bias]
Loading weights: 39%|███▉ | 134/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.mlp.fc1.bias]
Loading weights: 40%|███▉ | 135/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.mlp.fc1.weight]
Loading weights: 40%|███▉ | 135/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.mlp.fc1.weight]
Loading weights: 40%|███▉ | 136/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.mlp.fc2.bias]
Loading weights: 40%|███▉ | 136/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.mlp.fc2.bias]
Loading weights: 40%|████ | 137/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.mlp.fc2.weight]
Loading weights: 40%|████ | 137/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.mlp.fc2.weight]
Loading weights: 40%|████ | 138/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.dense.bias]
Loading weights: 40%|████ | 138/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.dense.bias]
Loading weights: 41%|████ | 139/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.dense.weight]
Loading weights: 41%|████ | 139/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.dense.weight]
Loading weights: 41%|████ | 140/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.k_proj.bias]
Loading weights: 41%|████ | 140/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.k_proj.bias]
Loading weights: 41%|████▏ | 141/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.k_proj.weight]
Loading weights: 41%|████▏ | 141/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.k_proj.weight]
Loading weights: 42%|████▏ | 142/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.q_proj.bias]
Loading weights: 42%|████▏ | 142/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.q_proj.bias]
Loading weights: 42%|████▏ | 143/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.q_proj.weight]
Loading weights: 42%|████▏ | 143/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.q_proj.weight]
Loading weights: 42%|████▏ | 144/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.v_proj.bias]
Loading weights: 42%|████▏ | 144/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.v_proj.bias]
Loading weights: 43%|████▎ | 145/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.v_proj.weight]
Loading weights: 43%|████▎ | 145/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.9.self_attn.v_proj.weight]
Loading weights: 43%|████▎ | 146/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.input_layernorm.bias]
Loading weights: 43%|████▎ | 146/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.input_layernorm.bias]
Loading weights: 43%|████▎ | 147/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.input_layernorm.weight]
Loading weights: 43%|████▎ | 147/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.input_layernorm.weight]
Loading weights: 43%|████▎ | 148/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.mlp.fc1.bias]
Loading weights: 43%|████▎ | 148/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.mlp.fc1.bias]
Loading weights: 44%|████▎ | 149/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.mlp.fc1.weight]
Loading weights: 44%|████▎ | 149/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.mlp.fc1.weight]
Loading weights: 44%|████▍ | 150/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.mlp.fc2.bias]
Loading weights: 44%|████▍ | 150/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.mlp.fc2.bias]
Loading weights: 44%|████▍ | 151/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.mlp.fc2.weight]
Loading weights: 44%|████▍ | 151/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.mlp.fc2.weight]
Loading weights: 45%|████▍ | 152/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.dense.bias]
Loading weights: 45%|████▍ | 152/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.dense.bias]
Loading weights: 45%|████▍ | 153/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.dense.weight]
Loading weights: 45%|████▍ | 153/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.dense.weight]
Loading weights: 45%|████▌ | 154/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.k_proj.bias]
Loading weights: 45%|████▌ | 154/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.k_proj.bias]
Loading weights: 45%|████▌ | 155/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.k_proj.weight]
Loading weights: 45%|████▌ | 155/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.k_proj.weight]
Loading weights: 46%|████▌ | 156/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.q_proj.bias]
Loading weights: 46%|████▌ | 156/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.q_proj.bias]
Loading weights: 46%|████▌ | 157/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.q_proj.weight]
Loading weights: 46%|████▌ | 157/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.q_proj.weight]
Loading weights: 46%|████▋ | 158/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.v_proj.bias]
Loading weights: 46%|████▋ | 158/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.v_proj.bias]
Loading weights: 47%|████▋ | 159/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.v_proj.weight]
Loading weights: 47%|████▋ | 159/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.10.self_attn.v_proj.weight]
Loading weights: 47%|████▋ | 160/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.input_layernorm.bias]
Loading weights: 47%|████▋ | 160/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.input_layernorm.bias]
Loading weights: 47%|████▋ | 161/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.input_layernorm.weight]
Loading weights: 47%|████▋ | 161/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.input_layernorm.weight]
Loading weights: 48%|████▊ | 162/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.mlp.fc1.bias]
Loading weights: 48%|████▊ | 162/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.mlp.fc1.bias]
Loading weights: 48%|████▊ | 163/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.mlp.fc1.weight]
Loading weights: 48%|████▊ | 163/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.mlp.fc1.weight]
Loading weights: 48%|████▊ | 164/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.mlp.fc2.bias]
Loading weights: 48%|████▊ | 164/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.mlp.fc2.bias]
Loading weights: 48%|████▊ | 165/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.mlp.fc2.weight]
Loading weights: 48%|████▊ | 165/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.mlp.fc2.weight]
Loading weights: 49%|████▊ | 166/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.dense.bias]
Loading weights: 49%|████▊ | 166/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.dense.bias]
Loading weights: 49%|████▉ | 167/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.dense.weight]
Loading weights: 49%|████▉ | 167/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.dense.weight]
Loading weights: 49%|████▉ | 168/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.k_proj.bias]
Loading weights: 49%|████▉ | 168/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.k_proj.bias]
Loading weights: 50%|████▉ | 169/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.k_proj.weight]
Loading weights: 50%|████▉ | 169/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.k_proj.weight]
Loading weights: 50%|████▉ | 170/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.q_proj.bias]
Loading weights: 50%|████▉ | 170/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.q_proj.bias]
Loading weights: 50%|█████ | 171/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.q_proj.weight]
Loading weights: 50%|█████ | 171/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.q_proj.weight]
Loading weights: 50%|█████ | 172/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.v_proj.bias]
Loading weights: 50%|█████ | 172/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.v_proj.bias]
Loading weights: 51%|█████ | 173/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.v_proj.weight]
Loading weights: 51%|█████ | 173/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.11.self_attn.v_proj.weight]
Loading weights: 51%|█████ | 174/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.input_layernorm.bias]
Loading weights: 51%|█████ | 174/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.input_layernorm.bias]
Loading weights: 51%|█████▏ | 175/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.input_layernorm.weight]
Loading weights: 51%|█████▏ | 175/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.input_layernorm.weight]
Loading weights: 52%|█████▏ | 176/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.mlp.fc1.bias]
Loading weights: 52%|█████▏ | 176/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.mlp.fc1.bias]
Loading weights: 52%|█████▏ | 177/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.mlp.fc1.weight]
Loading weights: 52%|█████▏ | 177/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.mlp.fc1.weight]
Loading weights: 52%|█████▏ | 178/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.mlp.fc2.bias]
Loading weights: 52%|█████▏ | 178/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.mlp.fc2.bias]
Loading weights: 52%|█████▏ | 179/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.mlp.fc2.weight]
Loading weights: 52%|█████▏ | 179/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.mlp.fc2.weight]
Loading weights: 53%|█████▎ | 180/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.dense.bias]
Loading weights: 53%|█████▎ | 180/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.dense.bias]
Loading weights: 53%|█████▎ | 181/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.dense.weight]
Loading weights: 53%|█████▎ | 181/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.dense.weight]
Loading weights: 53%|█████▎ | 182/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.k_proj.bias]
Loading weights: 53%|█████▎ | 182/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.k_proj.bias]
Loading weights: 54%|█████▎ | 183/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.k_proj.weight]
Loading weights: 54%|█████▎ | 183/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.k_proj.weight]
Loading weights: 54%|█████▍ | 184/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.q_proj.bias]
Loading weights: 54%|█████▍ | 184/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.q_proj.bias]
Loading weights: 54%|█████▍ | 185/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.q_proj.weight]
Loading weights: 54%|█████▍ | 185/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.q_proj.weight]
Loading weights: 55%|█████▍ | 186/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.v_proj.bias]
Loading weights: 55%|█████▍ | 186/341 [00:00<00:00, 588.87it/s, Materializing param=model.layers.12.self_attn.v_proj.bias]
Loading weights: 55%|█████▍ | 187/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.12.self_attn.v_proj.bias]
Loading weights: 55%|█████▍ | 187/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.12.self_attn.v_proj.weight]
Loading weights: 55%|█████▍ | 187/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.12.self_attn.v_proj.weight]
Loading weights: 55%|█████▌ | 188/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.input_layernorm.bias]
Loading weights: 55%|█████▌ | 188/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.input_layernorm.bias]
Loading weights: 55%|█████▌ | 189/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.input_layernorm.weight]
Loading weights: 55%|█████▌ | 189/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.input_layernorm.weight]
Loading weights: 56%|█████▌ | 190/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.mlp.fc1.bias]
Loading weights: 56%|█████▌ | 190/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.mlp.fc1.bias]
Loading weights: 56%|█████▌ | 191/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.mlp.fc1.weight]
Loading weights: 56%|█████▌ | 191/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.mlp.fc1.weight]
Loading weights: 56%|█████▋ | 192/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.mlp.fc2.bias]
Loading weights: 56%|█████▋ | 192/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.mlp.fc2.bias]
Loading weights: 57%|█████▋ | 193/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.mlp.fc2.weight]
Loading weights: 57%|█████▋ | 193/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.mlp.fc2.weight]
Loading weights: 57%|█████▋ | 194/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.dense.bias]
Loading weights: 57%|█████▋ | 194/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.dense.bias]
Loading weights: 57%|█████▋ | 195/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.dense.weight]
Loading weights: 57%|█████▋ | 195/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.dense.weight]
Loading weights: 57%|█████▋ | 196/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.k_proj.bias]
Loading weights: 57%|█████▋ | 196/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.k_proj.bias]
Loading weights: 58%|█████▊ | 197/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.k_proj.weight]
Loading weights: 58%|█████▊ | 197/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.k_proj.weight]
Loading weights: 58%|█████▊ | 198/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.q_proj.bias]
Loading weights: 58%|█████▊ | 198/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.q_proj.bias]
Loading weights: 58%|█████▊ | 199/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.q_proj.weight]
Loading weights: 58%|█████▊ | 199/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.q_proj.weight]
Loading weights: 59%|█████▊ | 200/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.v_proj.bias]
Loading weights: 59%|█████▊ | 200/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.v_proj.bias]
Loading weights: 59%|█████▉ | 201/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.v_proj.weight]
Loading weights: 59%|█████▉ | 201/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.13.self_attn.v_proj.weight]
Loading weights: 59%|█████▉ | 202/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.input_layernorm.bias]
Loading weights: 59%|█████▉ | 202/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.input_layernorm.bias]
Loading weights: 60%|█████▉ | 203/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.input_layernorm.weight]
Loading weights: 60%|█████▉ | 203/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.input_layernorm.weight]
Loading weights: 60%|█████▉ | 204/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.mlp.fc1.bias]
Loading weights: 60%|█████▉ | 204/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.mlp.fc1.bias]
Loading weights: 60%|██████ | 205/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.mlp.fc1.weight]
Loading weights: 60%|██████ | 205/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.mlp.fc1.weight]
Loading weights: 60%|██████ | 206/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.mlp.fc2.bias]
Loading weights: 60%|██████ | 206/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.mlp.fc2.bias]
Loading weights: 61%|██████ | 207/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.mlp.fc2.weight]
Loading weights: 61%|██████ | 207/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.mlp.fc2.weight]
Loading weights: 61%|██████ | 208/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.dense.bias]
Loading weights: 61%|██████ | 208/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.dense.bias]
Loading weights: 61%|██████▏ | 209/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.dense.weight]
Loading weights: 61%|██████▏ | 209/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.dense.weight]
Loading weights: 62%|██████▏ | 210/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.k_proj.bias]
Loading weights: 62%|██████▏ | 210/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.k_proj.bias]
Loading weights: 62%|██████▏ | 211/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.k_proj.weight]
Loading weights: 62%|██████▏ | 211/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.k_proj.weight]
Loading weights: 62%|██████▏ | 212/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.q_proj.bias]
Loading weights: 62%|██████▏ | 212/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.q_proj.bias]
Loading weights: 62%|██████▏ | 213/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.q_proj.weight]
Loading weights: 62%|██████▏ | 213/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.q_proj.weight]
Loading weights: 63%|██████▎ | 214/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.v_proj.bias]
Loading weights: 63%|██████▎ | 214/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.v_proj.bias]
Loading weights: 63%|██████▎ | 215/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.v_proj.weight]
Loading weights: 63%|██████▎ | 215/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.14.self_attn.v_proj.weight]
Loading weights: 63%|██████▎ | 216/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.input_layernorm.bias]
Loading weights: 63%|██████▎ | 216/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.input_layernorm.bias]
Loading weights: 64%|██████▎ | 217/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.input_layernorm.weight]
Loading weights: 64%|██████▎ | 217/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.input_layernorm.weight]
Loading weights: 64%|██████▍ | 218/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.mlp.fc1.bias]
Loading weights: 64%|██████▍ | 218/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.mlp.fc1.bias]
Loading weights: 64%|██████▍ | 219/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.mlp.fc1.weight]
Loading weights: 64%|██████▍ | 219/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.mlp.fc1.weight]
Loading weights: 65%|██████▍ | 220/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.mlp.fc2.bias]
Loading weights: 65%|██████▍ | 220/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.mlp.fc2.bias]
Loading weights: 65%|██████▍ | 221/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.mlp.fc2.weight]
Loading weights: 65%|██████▍ | 221/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.mlp.fc2.weight]
Loading weights: 65%|██████▌ | 222/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.dense.bias]
Loading weights: 65%|██████▌ | 222/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.dense.bias]
Loading weights: 65%|██████▌ | 223/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.dense.weight]
Loading weights: 65%|██████▌ | 223/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.dense.weight]
Loading weights: 66%|██████▌ | 224/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.k_proj.bias]
Loading weights: 66%|██████▌ | 224/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.k_proj.bias]
Loading weights: 66%|██████▌ | 225/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.k_proj.weight]
Loading weights: 66%|██████▌ | 225/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.k_proj.weight]
Loading weights: 66%|██████▋ | 226/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.q_proj.bias]
Loading weights: 66%|██████▋ | 226/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.q_proj.bias]
Loading weights: 67%|██████▋ | 227/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.q_proj.weight]
Loading weights: 67%|██████▋ | 227/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.q_proj.weight]
Loading weights: 67%|██████▋ | 228/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.v_proj.bias]
Loading weights: 67%|██████▋ | 228/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.v_proj.bias]
Loading weights: 67%|██████▋ | 229/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.v_proj.weight]
Loading weights: 67%|██████▋ | 229/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.15.self_attn.v_proj.weight]
Loading weights: 67%|██████▋ | 230/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.input_layernorm.bias]
Loading weights: 67%|██████▋ | 230/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.input_layernorm.bias]
Loading weights: 68%|██████▊ | 231/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.input_layernorm.weight]
Loading weights: 68%|██████▊ | 231/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.input_layernorm.weight]
Loading weights: 68%|██████▊ | 232/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.mlp.fc1.bias]
Loading weights: 68%|██████▊ | 232/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.mlp.fc1.bias]
Loading weights: 68%|██████▊ | 233/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.mlp.fc1.weight]
Loading weights: 68%|██████▊ | 233/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.mlp.fc1.weight]
Loading weights: 69%|██████▊ | 234/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.mlp.fc2.bias]
Loading weights: 69%|██████▊ | 234/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.mlp.fc2.bias]
Loading weights: 69%|██████▉ | 235/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.mlp.fc2.weight]
Loading weights: 69%|██████▉ | 235/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.mlp.fc2.weight]
Loading weights: 69%|██████▉ | 236/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.dense.bias]
Loading weights: 69%|██████▉ | 236/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.dense.bias]
Loading weights: 70%|██████▉ | 237/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.dense.weight]
Loading weights: 70%|██████▉ | 237/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.dense.weight]
Loading weights: 70%|██████▉ | 238/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.k_proj.bias]
Loading weights: 70%|██████▉ | 238/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.k_proj.bias]
Loading weights: 70%|███████ | 239/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.k_proj.weight]
Loading weights: 70%|███████ | 239/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.k_proj.weight]
Loading weights: 70%|███████ | 240/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.q_proj.bias]
Loading weights: 70%|███████ | 240/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.q_proj.bias]
Loading weights: 71%|███████ | 241/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.q_proj.weight]
Loading weights: 71%|███████ | 241/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.q_proj.weight]
Loading weights: 71%|███████ | 242/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.v_proj.bias]
Loading weights: 71%|███████ | 242/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.v_proj.bias]
Loading weights: 71%|███████▏ | 243/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.v_proj.weight]
Loading weights: 71%|███████▏ | 243/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.16.self_attn.v_proj.weight]
Loading weights: 72%|███████▏ | 244/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.17.input_layernorm.bias]
Loading weights: 72%|███████▏ | 244/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.17.input_layernorm.bias]
Loading weights: 72%|███████▏ | 245/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.17.input_layernorm.weight]
Loading weights: 72%|███████▏ | 245/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.17.input_layernorm.weight]
Loading weights: 72%|███████▏ | 246/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.17.mlp.fc1.bias]
Loading weights: 72%|███████▏ | 246/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.17.mlp.fc1.bias]
Loading weights: 72%|███████▏ | 247/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.17.mlp.fc1.weight]
Loading weights: 72%|███████▏ | 247/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.17.mlp.fc1.weight]
Loading weights: 73%|███████▎ | 248/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.17.mlp.fc2.bias]
Loading weights: 73%|███████▎ | 248/341 [00:00<00:00, 619.34it/s, Materializing param=model.layers.17.mlp.fc2.bias]
Loading weights: 73%|███████▎ | 249/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.mlp.fc2.bias]
Loading weights: 73%|███████▎ | 249/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.mlp.fc2.weight]
Loading weights: 73%|███████▎ | 249/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.mlp.fc2.weight]
Loading weights: 73%|███████▎ | 250/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.dense.bias]
Loading weights: 73%|███████▎ | 250/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.dense.bias]
Loading weights: 74%|███████▎ | 251/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.dense.weight]
Loading weights: 74%|███████▎ | 251/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.dense.weight]
Loading weights: 74%|███████▍ | 252/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.k_proj.bias]
Loading weights: 74%|███████▍ | 252/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.k_proj.bias]
Loading weights: 74%|███████▍ | 253/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.k_proj.weight]
Loading weights: 74%|███████▍ | 253/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.k_proj.weight]
Loading weights: 74%|███████▍ | 254/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.q_proj.bias]
Loading weights: 74%|███████▍ | 254/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.q_proj.bias]
Loading weights: 75%|███████▍ | 255/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.q_proj.weight]
Loading weights: 75%|███████▍ | 255/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.q_proj.weight]
Loading weights: 75%|███████▌ | 256/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.v_proj.bias]
Loading weights: 75%|███████▌ | 256/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.v_proj.bias]
Loading weights: 75%|███████▌ | 257/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.v_proj.weight]
Loading weights: 75%|███████▌ | 257/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.17.self_attn.v_proj.weight]
Loading weights: 76%|███████▌ | 258/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.input_layernorm.bias]
Loading weights: 76%|███████▌ | 258/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.input_layernorm.bias]
Loading weights: 76%|███████▌ | 259/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.input_layernorm.weight]
Loading weights: 76%|███████▌ | 259/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.input_layernorm.weight]
Loading weights: 76%|███████▌ | 260/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.mlp.fc1.bias]
Loading weights: 76%|███████▌ | 260/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.mlp.fc1.bias]
Loading weights: 77%|███████▋ | 261/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.mlp.fc1.weight]
Loading weights: 77%|███████▋ | 261/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.mlp.fc1.weight]
Loading weights: 77%|███████▋ | 262/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.mlp.fc2.bias]
Loading weights: 77%|███████▋ | 262/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.mlp.fc2.bias]
Loading weights: 77%|███████▋ | 263/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.mlp.fc2.weight]
Loading weights: 77%|███████▋ | 263/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.mlp.fc2.weight]
Loading weights: 77%|███████▋ | 264/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.dense.bias]
Loading weights: 77%|███████▋ | 264/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.dense.bias]
Loading weights: 78%|███████▊ | 265/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.dense.weight]
Loading weights: 78%|███████▊ | 265/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.dense.weight]
Loading weights: 78%|███████▊ | 266/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.k_proj.bias]
Loading weights: 78%|███████▊ | 266/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.k_proj.bias]
Loading weights: 78%|███████▊ | 267/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.k_proj.weight]
Loading weights: 78%|███████▊ | 267/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.k_proj.weight]
Loading weights: 79%|███████▊ | 268/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.q_proj.bias]
Loading weights: 79%|███████▊ | 268/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.q_proj.bias]
Loading weights: 79%|███████▉ | 269/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.q_proj.weight]
Loading weights: 79%|███████▉ | 269/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.q_proj.weight]
Loading weights: 79%|███████▉ | 270/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.v_proj.bias]
Loading weights: 79%|███████▉ | 270/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.v_proj.bias]
Loading weights: 79%|███████▉ | 271/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.v_proj.weight]
Loading weights: 79%|███████▉ | 271/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.18.self_attn.v_proj.weight]
Loading weights: 80%|███████▉ | 272/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.input_layernorm.bias]
Loading weights: 80%|███████▉ | 272/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.input_layernorm.bias]
Loading weights: 80%|████████ | 273/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.input_layernorm.weight]
Loading weights: 80%|████████ | 273/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.input_layernorm.weight]
Loading weights: 80%|████████ | 274/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.mlp.fc1.bias]
Loading weights: 80%|████████ | 274/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.mlp.fc1.bias]
Loading weights: 81%|████████ | 275/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.mlp.fc1.weight]
Loading weights: 81%|████████ | 275/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.mlp.fc1.weight]
Loading weights: 81%|████████ | 276/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.mlp.fc2.bias]
Loading weights: 81%|████████ | 276/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.mlp.fc2.bias]
Loading weights: 81%|████████ | 277/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.mlp.fc2.weight]
Loading weights: 81%|████████ | 277/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.mlp.fc2.weight]
Loading weights: 82%|████████▏ | 278/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.dense.bias]
Loading weights: 82%|████████▏ | 278/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.dense.bias]
Loading weights: 82%|████████▏ | 279/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.dense.weight]
Loading weights: 82%|████████▏ | 279/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.dense.weight]
Loading weights: 82%|████████▏ | 280/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.k_proj.bias]
Loading weights: 82%|████████▏ | 280/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.k_proj.bias]
Loading weights: 82%|████████▏ | 281/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.k_proj.weight]
Loading weights: 82%|████████▏ | 281/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.k_proj.weight]
Loading weights: 83%|████████▎ | 282/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.q_proj.bias]
Loading weights: 83%|████████▎ | 282/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.q_proj.bias]
Loading weights: 83%|████████▎ | 283/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.q_proj.weight]
Loading weights: 83%|████████▎ | 283/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.q_proj.weight]
Loading weights: 83%|████████▎ | 284/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.v_proj.bias]
Loading weights: 83%|████████▎ | 284/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.v_proj.bias]
Loading weights: 84%|████████▎ | 285/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.v_proj.weight]
Loading weights: 84%|████████▎ | 285/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.19.self_attn.v_proj.weight]
Loading weights: 84%|████████▍ | 286/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.input_layernorm.bias]
Loading weights: 84%|████████▍ | 286/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.input_layernorm.bias]
Loading weights: 84%|████████▍ | 287/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.input_layernorm.weight]
Loading weights: 84%|████████▍ | 287/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.input_layernorm.weight]
Loading weights: 84%|████████▍ | 288/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.mlp.fc1.bias]
Loading weights: 84%|████████▍ | 288/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.mlp.fc1.bias]
Loading weights: 85%|████████▍ | 289/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.mlp.fc1.weight]
Loading weights: 85%|████████▍ | 289/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.mlp.fc1.weight]
Loading weights: 85%|████████▌ | 290/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.mlp.fc2.bias]
Loading weights: 85%|████████▌ | 290/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.mlp.fc2.bias]
Loading weights: 85%|████████▌ | 291/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.mlp.fc2.weight]
Loading weights: 85%|████████▌ | 291/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.mlp.fc2.weight]
Loading weights: 86%|████████▌ | 292/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.dense.bias]
Loading weights: 86%|████████▌ | 292/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.dense.bias]
Loading weights: 86%|████████▌ | 293/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.dense.weight]
Loading weights: 86%|████████▌ | 293/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.dense.weight]
Loading weights: 86%|████████▌ | 294/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.k_proj.bias]
Loading weights: 86%|████████▌ | 294/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.k_proj.bias]
Loading weights: 87%|████████▋ | 295/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.k_proj.weight]
Loading weights: 87%|████████▋ | 295/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.k_proj.weight]
Loading weights: 87%|████████▋ | 296/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.q_proj.bias]
Loading weights: 87%|████████▋ | 296/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.q_proj.bias]
Loading weights: 87%|████████▋ | 297/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.q_proj.weight]
Loading weights: 87%|████████▋ | 297/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.q_proj.weight]
Loading weights: 87%|████████▋ | 298/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.v_proj.bias]
Loading weights: 87%|████████▋ | 298/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.v_proj.bias]
Loading weights: 88%|████████▊ | 299/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.v_proj.weight]
Loading weights: 88%|████████▊ | 299/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.20.self_attn.v_proj.weight]
Loading weights: 88%|████████▊ | 300/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.input_layernorm.bias]
Loading weights: 88%|████████▊ | 300/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.input_layernorm.bias]
Loading weights: 88%|████████▊ | 301/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.input_layernorm.weight]
Loading weights: 88%|████████▊ | 301/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.input_layernorm.weight]
Loading weights: 89%|████████▊ | 302/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.mlp.fc1.bias]
Loading weights: 89%|████████▊ | 302/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.mlp.fc1.bias]
Loading weights: 89%|████████▉ | 303/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.mlp.fc1.weight]
Loading weights: 89%|████████▉ | 303/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.mlp.fc1.weight]
Loading weights: 89%|████████▉ | 304/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.mlp.fc2.bias]
Loading weights: 89%|████████▉ | 304/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.mlp.fc2.bias]
Loading weights: 89%|████████▉ | 305/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.mlp.fc2.weight]
Loading weights: 89%|████████▉ | 305/341 [00:00<00:00, 557.23it/s, Materializing param=model.layers.21.mlp.fc2.weight]
Loading weights: 90%|████████▉ | 306/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.mlp.fc2.weight]
Loading weights: 90%|████████▉ | 306/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.dense.bias]
Loading weights: 90%|████████▉ | 306/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.dense.bias]
Loading weights: 90%|█████████ | 307/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.dense.weight]
Loading weights: 90%|█████████ | 307/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.dense.weight]
Loading weights: 90%|█████████ | 308/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.k_proj.bias]
Loading weights: 90%|█████████ | 308/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.k_proj.bias]
Loading weights: 91%|█████████ | 309/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.k_proj.weight]
Loading weights: 91%|█████████ | 309/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.k_proj.weight]
Loading weights: 91%|█████████ | 310/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.q_proj.bias]
Loading weights: 91%|█████████ | 310/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.q_proj.bias]
Loading weights: 91%|█████████ | 311/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.q_proj.weight]
Loading weights: 91%|█████████ | 311/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.q_proj.weight]
Loading weights: 91%|█████████▏| 312/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.v_proj.bias]
Loading weights: 91%|█████████▏| 312/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.v_proj.bias]
Loading weights: 92%|█████████▏| 313/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.v_proj.weight]
Loading weights: 92%|█████████▏| 313/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.21.self_attn.v_proj.weight]
Loading weights: 92%|█████████▏| 314/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.input_layernorm.bias]
Loading weights: 92%|█████████▏| 314/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.input_layernorm.bias]
Loading weights: 92%|█████████▏| 315/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.input_layernorm.weight]
Loading weights: 92%|█████████▏| 315/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.input_layernorm.weight]
Loading weights: 93%|█████████▎| 316/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.mlp.fc1.bias]
Loading weights: 93%|█████████▎| 316/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.mlp.fc1.bias]
Loading weights: 93%|█████████▎| 317/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.mlp.fc1.weight]
Loading weights: 93%|█████████▎| 317/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.mlp.fc1.weight]
Loading weights: 93%|█████████▎| 318/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.mlp.fc2.bias]
Loading weights: 93%|█████████▎| 318/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.mlp.fc2.bias]
Loading weights: 94%|█████████▎| 319/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.mlp.fc2.weight]
Loading weights: 94%|█████████▎| 319/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.mlp.fc2.weight]
Loading weights: 94%|█████████▍| 320/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.dense.bias]
Loading weights: 94%|█████████▍| 320/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.dense.bias]
Loading weights: 94%|█████████▍| 321/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.dense.weight]
Loading weights: 94%|█████████▍| 321/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.dense.weight]
Loading weights: 94%|█████████▍| 322/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.k_proj.bias]
Loading weights: 94%|█████████▍| 322/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.k_proj.bias]
Loading weights: 95%|█████████▍| 323/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.k_proj.weight]
Loading weights: 95%|█████████▍| 323/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.k_proj.weight]
Loading weights: 95%|█████████▌| 324/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.q_proj.bias]
Loading weights: 95%|█████████▌| 324/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.q_proj.bias]
Loading weights: 95%|█████████▌| 325/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.q_proj.weight]
Loading weights: 95%|█████████▌| 325/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.q_proj.weight]
Loading weights: 96%|█████████▌| 326/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.v_proj.bias]
Loading weights: 96%|█████████▌| 326/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.v_proj.bias]
Loading weights: 96%|█████████▌| 327/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.v_proj.weight]
Loading weights: 96%|█████████▌| 327/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.22.self_attn.v_proj.weight]
Loading weights: 96%|█████████▌| 328/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.input_layernorm.bias]
Loading weights: 96%|█████████▌| 328/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.input_layernorm.bias]
Loading weights: 96%|█████████▋| 329/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.input_layernorm.weight]
Loading weights: 96%|█████████▋| 329/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.input_layernorm.weight]
Loading weights: 97%|█████████▋| 330/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.mlp.fc1.bias]
Loading weights: 97%|█████████▋| 330/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.mlp.fc1.bias]
Loading weights: 97%|█████████▋| 331/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.mlp.fc1.weight]
Loading weights: 97%|█████████▋| 331/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.mlp.fc1.weight]
Loading weights: 97%|█████████▋| 332/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.mlp.fc2.bias]
Loading weights: 97%|█████████▋| 332/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.mlp.fc2.bias]
Loading weights: 98%|█████████▊| 333/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.mlp.fc2.weight]
Loading weights: 98%|█████████▊| 333/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.mlp.fc2.weight]
Loading weights: 98%|█████████▊| 334/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.dense.bias]
Loading weights: 98%|█████████▊| 334/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.dense.bias]
Loading weights: 98%|█████████▊| 335/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.dense.weight]
Loading weights: 98%|█████████▊| 335/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.dense.weight]
Loading weights: 99%|█████████▊| 336/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.k_proj.bias]
Loading weights: 99%|█████████▊| 336/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.k_proj.bias]
Loading weights: 99%|█████████▉| 337/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.k_proj.weight]
Loading weights: 99%|█████████▉| 337/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.k_proj.weight]
Loading weights: 99%|█████████▉| 338/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.q_proj.bias]
Loading weights: 99%|█████████▉| 338/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.q_proj.bias]
Loading weights: 99%|█████████▉| 339/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.q_proj.weight]
Loading weights: 99%|█████████▉| 339/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.q_proj.weight]
Loading weights: 100%|█████████▉| 340/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.v_proj.bias]
Loading weights: 100%|█████████▉| 340/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.v_proj.bias]
Loading weights: 100%|██████████| 341/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.v_proj.weight]
Loading weights: 100%|██████████| 341/341 [00:00<00:00, 557.86it/s, Materializing param=model.layers.23.self_attn.v_proj.weight]
Loading weights: 100%|██████████| 341/341 [00:00<00:00, 589.26it/s, Materializing param=model.layers.23.self_attn.v_proj.weight]
-- done.
-- tokenize the prompt...
-- done.
-- compute the answer...
-- done in 3.7324481080013356
output shape: T7s1x123[7,50285:A10138.878048780489]
-- decode the answer...
-- done.
def print_prime(n):
"""
Print all primes between 1 and n
"""
primes = []
for num in range(2, n+1):
is_prime = True
for i in range(2, int(math.sqrt(num))+1):
if num % i == 0:
is_prime = False
break
if is_prime:
primes.append(num)
print(primes)
print_prime(20)
``
eos_token_id?¶
This token means the end of the answer.
print("eos_token_id=", tokenizer.eos_token_id)
eos_token_id= 50256
Custom method generate¶
Let’s implement a simple function replicating when method
generate does.
def simple_generate_with_cache(
model, input_ids: torch.Tensor, eos_token_id: int, max_new_tokens: int = 100
):
# First call: prefill
outputs = model(input_ids, use_cache=True)
# Next calls: decode
for _ in tqdm(list(range(max_new_tokens))):
next_token_logits = outputs.logits[:, -1, :]
past_key_values = outputs.past_key_values
# The most probable next token is chosen.
next_token_id = torch.argmax(next_token_logits, dim=-1, keepdim=True)
# But we could select it using a multinomial law
# <<< probs = torch.softmax(next_token_logits / temperature, dim=-1)
# <<< top_probs, top_indices = torch.topk(probs, top_k)
# <<< next_token_id = top_indices[torch.multinomial(top_probs, 1)]
if next_token_id.item() == eos_token_id:
break
input_ids = torch.cat([input_ids, next_token_id], dim=-1)
# Feed only the new token, but with the cache
outputs = model(next_token_id, use_cache=True, past_key_values=past_key_values)
return input_ids
print("-- compute the answer with custom generate...")
begin = time.perf_counter()
outputs = simple_generate_with_cache(
model, inputs.input_ids, eos_token_id=tokenizer.eos_token_id, max_new_tokens=100
)
duration = time.perf_counter() - begin
print(f"-- done in {duration}")
data.append(dict(name="custom", duration=duration))
print("-- done.")
print("output shape:", string_type(outputs, with_shape=True, with_min_max=True))
print("-- decode the answer...")
text = tokenizer.batch_decode(outputs)[0]
print("-- done.")
print(text)
-- compute the answer with custom generate...
0%| | 0/100 [00:00<?, ?it/s]
3%|▎ | 3/100 [00:00<00:04, 23.91it/s]
6%|▌ | 6/100 [00:00<00:04, 21.31it/s]
9%|▉ | 9/100 [00:00<00:04, 22.62it/s]
13%|█▎ | 13/100 [00:00<00:03, 26.25it/s]
17%|█▋ | 17/100 [00:00<00:03, 27.44it/s]
20%|██ | 20/100 [00:00<00:02, 27.83it/s]
23%|██▎ | 23/100 [00:00<00:02, 27.16it/s]
26%|██▌ | 26/100 [00:00<00:02, 26.55it/s]
29%|██▉ | 29/100 [00:01<00:02, 26.51it/s]
32%|███▏ | 32/100 [00:01<00:02, 26.63it/s]
35%|███▌ | 35/100 [00:01<00:02, 27.14it/s]
38%|███▊ | 38/100 [00:01<00:02, 27.51it/s]
41%|████ | 41/100 [00:01<00:02, 27.74it/s]
44%|████▍ | 44/100 [00:01<00:02, 26.78it/s]
47%|████▋ | 47/100 [00:01<00:02, 24.61it/s]
50%|█████ | 50/100 [00:01<00:02, 24.23it/s]
53%|█████▎ | 53/100 [00:02<00:02, 23.04it/s]
56%|█████▌ | 56/100 [00:02<00:01, 23.82it/s]
59%|█████▉ | 59/100 [00:02<00:01, 24.48it/s]
62%|██████▏ | 62/100 [00:02<00:01, 25.21it/s]
65%|██████▌ | 65/100 [00:02<00:01, 25.60it/s]
68%|██████▊ | 68/100 [00:02<00:01, 24.59it/s]
71%|███████ | 71/100 [00:02<00:01, 22.96it/s]
74%|███████▍ | 74/100 [00:02<00:01, 23.63it/s]
77%|███████▋ | 77/100 [00:03<00:01, 22.25it/s]
94%|█████████▍| 94/100 [00:03<00:00, 54.61it/s]
100%|██████████| 100/100 [00:03<00:00, 41.89it/s]
100%|██████████| 100/100 [00:03<00:00, 28.96it/s]
-- done in 4.518989952999618
-- done.
output shape: T7s1x123[7,50285:A10138.878048780489]
-- decode the answer...
-- done.
def print_prime(n):
"""
Print all primes between 1 and n
"""
primes = []
for num in range(2, n+1):
is_prime = True
for i in range(2, int(math.sqrt(num))+1):
if num % i == 0:
is_prime = False
break
if is_prime:
primes.append(num)
print(primes)
print_prime(20)
``
Method generate for onnx models¶
We first need to export the model into ONNX.
ONNX Conversion¶
if "position_ids" in export_inputs:
del export_inputs["position_ids"]
del export_shapes["position_ids"]
dtype = get_weight_type(model)
print("-- model dtype:", dtype)
export_inputs["past_key_values"] = to_any(export_inputs["past_key_values"], dtype)
exporter = "onnx-dynamo" if "dynamo" in sys.argv else "custom"
model_name = f"model_{model_id.replace('/', '-')}.{exporter}.onnx"
if not os.path.exists(model_name):
# This step is slow so let's skip it if it was already done.
print("-- conversion to ONNX.")
begin = time.perf_counter()
with torch_export_patches(patch_transformers=True):
to_onnx(
model,
(),
kwargs=to_any(export_inputs, device),
dynamic_shapes=export_shapes,
filename=model_name,
verbose=1,
exporter=exporter,
)
duration = time.perf_counter() - begin
print(f"-- done in {duration}")
-- model dtype: torch.float16
-- conversion to ONNX.
[to_onnx] build the graph module from <class 'transformers.models.phi.modeling_phi.PhiForCausalLM'>, type(args)=<class 'tuple'>
[to_onnx] dynamic_shapes={'input_ids': {0: 'batch', 1: 'seq_length'}, 'attention_mask': {0: 'batch', 1: 'cache+seq'}, 'past_key_values': [{0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}]}
[_make_builder_interpreter] export_options=ExportOptions(aten_as_function=('aten.index_copy.default', 'aten.index_put.default', 'aten.setitem', <built-in function setitem>))
[_make_builder_interpreter] input args=()
[_make_builder_interpreter] input kwargs=dict(input_ids:T7r2,attention_mask:T7r2,past_key_values:DynamicCache(key_cache=#24[T10r4,...], value_cache=#24[T10r4,...]))
[_make_builder_interpreter] dynamic_shapes={'input_ids': {0: 'batch', 1: 'seq_length'}, 'attention_mask': {0: 'batch', 1: 'cache+seq'}, 'past_key_values': [{0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}]}
[_make_builder_interpreter] same_signature=True, tracing_mode=symbolic
[ExportOptions.export] ExportOptions(aten_as_function=('aten.index_copy.default', 'aten.index_put.default', 'aten.setitem', <built-in function setitem>)) - torch._dynamo.export 'PhiForCausalLM'
[ExportOptions.export] aten_as_function=('aten.index_copy.default', 'aten.index_put.default', 'aten.setitem', <built-in function setitem>)
[ExportOptions.export] torch_export strict=False, verbose=1
[ExportOptions.export] dynamic_shapes={'input_ids': {0: 'batch', 1: 'seq_length'}, 'attention_mask': {0: 'batch', 1: 'cache+seq'}, 'past_key_values': [{0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}]}
[ExportOptions.export] args=()
[ExportOptions.export] kwargs=dict(input_ids:T7r2,attention_mask:T7r2,past_key_values:DynamicCache(key_cache=#24[T10r4,...], value_cache=#24[T10r4,...]))
[ExportOptions.export] export start with strict=False...
[ExportOptions.export] export with backed_size_oblivious=auto
[torch_export] backed_size_oblivious='auto'
[torch_export] inferred backed_size_oblivious=None
[torch_export] export starts with backed_size_oblivious=None
[ExportOptions.export] export done in 11.546899875000236
[ExportOptions.export] post_process_exported_program with decomposition_table=None
[ExportOptions.export] remove inplace nodes
[ExportOptions.export] slices: 3 slices nodes were removed
[CustomTracer.remove_inplace] starts with 1891 nodes (n_inplace_submobules=0)
[CustomTracer.remove_inplace] S1: 80 inplace nodes
[CustomTracer.remove_inplace] S2: 74 inplace nodes and 100 iterations
[CustomTracer.remove_inplace] end with 95 iterations and 1706 nodes (n_inplace=74)
[ExportOptions.export] inplaces: 80 inplaced nodes were removed
[ExportOptions.export] done remove inplace in 0.045854885000153445, modified=80
[ExportOptions.export] done with no decomposition in 0.046567682999011595
[to_onnx] graph module done in 11.606618785999672 s
[to_onnx] start creating the onnx nodes
[to_onnx] interpreter.function_options=FunctionOptions(export_as_function=True, name='*', domain='*', external_threshold=256, move_initializer_to_constant=True, return_initializer=True, merge_allowed=True, rename_allowed=True)
0%| | 0/1706 [00:00<?, ?it/s]
26%|██▌ | 443/1706 [00:00<00:00, 4422.22it/s]
52%|█████▏ | 886/1706 [00:00<00:00, 1750.22it/s]
67%|██████▋ | 1141/1706 [00:00<00:00, 1482.28it/s]
78%|███████▊ | 1331/1706 [00:00<00:00, 1350.91it/s]
87%|████████▋ | 1489/1706 [00:01<00:00, 1257.91it/s]
95%|█████████▌| 1627/1706 [00:01<00:00, 1217.29it/s]
100%|██████████| 1706/1706 [00:01<00:00, 1364.36it/s]
[to_onnx] 2308 onnx nodes done in 1.4222290480010997 s
[to_onnx] start conversion to onnx (before optimization) mask_outputs=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]
[GraphBuilder-LOA.inline_functions] begin inlining graph
[GraphBuilder-LOA.inline_functions] skip_functions=set()
[GraphBuilder-LOA._inline_functions_iterations] inline function 'submod_3' domain 'local_functions' [n_replacements=1]
[GraphBuilder-LOA._inline_functions_iterations] done with 9 new nodes for 'submod_3', 'local_functions'
[GraphBuilder-LOA.inline_functions] done inlining graph 140708725480128 in 0.032655167999109835
[GraphBuilder-LOA._add_shape_information] dynamic shapes replacements={'seq_length': 'seq_length', 'cache_length': 'cache_length', 'batch': 'batch', 'batch^s3^batch^s41': 'batch', 's77': 'batch', 's67': 'batch', 's90': 'batch', 's89': 'batch', 's104': 'batch', 's47': 'batch', 's62': 'batch', 'batch^s49^batch^s26': 'batch', 's83': 'batch', 's26': 'batch', 'batch^s87^batch^s23': 'batch', 'batch^s82^batch^s62': 'batch', 's36': 'batch', 's59': 'batch', 's64': 'batch', 's102': 'batch', 's3': 'batch', 's75': 'batch', 's10': 'batch', 'batch^s52^batch^s93': 'batch', 's45': 'batch', 's106': 'batch', 's56': 'batch', 's57': 'batch', 's41': 'batch', 'batch^s104^batch^s106': 'batch', 'batch^s36^batch^s13': 'batch', 's93': 'batch', 's13': 'batch', 's97': 'batch', 's91': 'batch', 's71': 'batch', 'batch^s35^batch^s60': 'batch', 'batch^s92^batch^s83': 'batch', 'batch^s64^batch^s86': 'batch', 's82': 'batch', 's86': 'batch', 's98': 'batch', 'batch^s48^batch^s59': 'batch', 's34': 'batch', 's35': 'batch', 'batch^s34^batch^s77': 'batch', 's84': 'batch', 's8': 'batch', 's79': 'batch', 'batch^s98^batch^s79': 'batch', 's29': 'batch', 'batch^s29^batch^s8': 'batch', 'batch^s90^batch^s57': 'batch', 's60': 'batch', 's52': 'batch', 'batch^s97^batch^s10': 'batch', 's30': 'batch', 'batch^s84^batch^s91': 'batch', 's39': 'batch', 's69': 'batch', 'batch^s67^batch^s61': 'batch', 'batch^s39^batch^s71': 'batch', 's61': 'batch', 's23': 'batch', 's49': 'batch', 'batch^s69^batch^s56': 'batch', 'batch^s30^batch^s89': 'batch', 's87': 'batch', 's72': 'batch', 's1': 'batch', 's92': 'batch', 'batch^s100^batch^s102': 'batch', 's48': 'batch', 'batch^s45^batch^s47': 'batch', 'batch^s1^batch^s75': 'batch', 's43': 'batch', 's100': 'batch', 's70': 'seq_length', 's9': 'cache_length', 's11': 'cache_length', 's40': 'cache_length', 's24': 'cache_length', 's4': 'cache_length', 's44': 'cache_length', 's78': 'cache_length', 's51': 'cache_length', 's88': 'cache_length', 's27': 'cache_length', 's81': 'cache_length', 's31': 'cache_length', 's18': 'cache_length', 's74': 'cache_length', 's94': 'cache_length', 's38': 'cache_length', 's96': 'cache_length', 's33': 'cache_length', 's63': 'cache_length', 's73': 'cache_length', 's80': 'cache_length', 's42': 'cache_length', 's21': 'cache_length', 's7': 'cache_length', 's15': 'cache_length', 's85': 'cache_length', 's65': 'cache_length', 's14': 'cache_length', 's32': 'cache_length', 's46': 'cache_length', 's105': 'cache_length', 's58': 'cache_length', 's99': 'cache_length', 's103': 'cache_length', 's66': 'cache_length', 's107': 'cache_length', 's76': 'cache_length', 's37': 'cache_length', 's2': 'cache_length', 's28': 'cache_length', 's101': 'cache_length', 's54': 'cache_length', 's95': 'cache_length', 's68': 'cache_length', 's22': 'cache_length', 's55': 'cache_length', 's50': 'cache_length', 's25': 'cache_length'}
[GraphBuilder-LOA.optimize] start with 2316 nodes
[GraphBuilder-LOA.optimize] #patterns=111
[GraphBuilder-LOA.optimize] start with subgraphs
[GraphBuilder-LOA.optimize] done with subgraphs
[GraphBuilderPatternOptimization-LOA.optimize] start with 1987 nodes, 461 initializers, 111 patterns, priorities=[0, 1, 2, 3], max_iter=7948
[GraphBuilderPatternOptimization-LOA.optimize] same children={'SameChildrenFromInputPattern', 'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] iteration 0: 1987 nodes, priority=0
[GraphBuilderPatternOptimization-LOA.optimize] applies 226 matches, 75*CastPattern, 2*IdentityPattern, 3*ShapeBasedReshapeIsSqueezePattern, 96*ShapeBasedEditDistanceReshapePattern, 18*ShapeBasedIdentityPattern, 5*SameChildrenPattern, 1*SqueezeAddPattern, 1*SqueezeUnsqueezePattern, 1*UnsqueezeUnsqueezePattern, 24*FunctionAttentionPattern - time=0.150 | max_time=IdentityPattern:0.048
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=204, n_removed=259, n_applied=276 applied patterns, 1595 nodes left with 23 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 1
[GraphBuilderPatternOptimization-LOA.optimize] iteration 1: 1595 nodes, priority=1
[GraphBuilderPatternOptimization-LOA.optimize] applies 202 matches, 2*ConcatTwiceUnaryPattern, 1*ConstantToInitializerPattern, 49*DropoutPattern, 25*LayerNormalizationPattern, 1*ShapeBasedExpandBroadcastPattern, 1*ShapeBasedExpandSwapPattern, 96*SlicesSplitPattern, 3*SqueezeUnsqueezePattern, 24*GeluOrtPattern - time=0.210 | max_time=IdentityPattern:0.018
[GraphBuilderPatternOptimization-LOA.optimize] iteration 2: 1127 nodes, priority=1
[GraphBuilderPatternOptimization-LOA.optimize] applies 101 matches, 2*ConcatTwiceUnaryPattern, 25*LayerNormalizationScalePattern, 2*ShapeBasedExpandSwapPattern, 48*FunctionHalfRotaryEmbeddingPattern, 24*FastGeluPattern - time=0.142 | max_time=IdentityPattern:0.010
[GraphBuilderPatternOptimization-LOA.optimize] iteration 3: 911 nodes, priority=1
[GraphBuilderPatternOptimization-LOA.optimize] applies 26 matches, 1*ShapeBasedExpandBroadcastPattern, 1*FunctionCausalMaskPattern, 24*SkipLayerNormalizationPattern - time=0.106 | max_time=IdentityPattern:0.016
[GraphBuilderPatternOptimization-LOA.optimize] iteration 4: 885 nodes, priority=1
[GraphBuilderPatternOptimization-LOA.optimize] applies 2 matches, 1*ShapeBasedConcatExpandPattern, 1*FunctionCausalMaskMulAddPattern - time=0.118 | max_time=IdentityPattern:0.015
[GraphBuilderPatternOptimization-LOA.optimize] iteration 5: 879 nodes, priority=1
[GraphBuilderPatternOptimization-LOA.optimize] applies 1 matches, [0]=MatchResult: FunctionCosSinCachePattern replaces ['Squeeze', 'Squeeze', 'Range', 'Unsqueeze', 'Cast', 'Reshape', 'Mul', 'Cos', 'Cast', 'Sin', 'Cast'] - time=0.086 | max_time=ShapeBasedEditDistanceReshapePattern:0.007
[GraphBuilderPatternOptimization-LOA.optimize] iteration 6: 869 nodes, priority=1
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 2
[GraphBuilderPatternOptimization-LOA.optimize] iteration 7: 869 nodes, priority=2
[GraphBuilderPatternOptimization-LOA.optimize] applies 1 matches, [0]=MatchResult: ContribRotaryEmbeddingPattern replaces ['Concat', 'Concat', 'Split', 'HalfRotaryEmbedding', 'Concat'] - time=0.111 | max_time=IdentityPattern:0.013
[GraphBuilderPatternOptimization-LOA.optimize] iteration 8: 874 nodes, priority=2
[GraphBuilderPatternOptimization-LOA.optimize] applies 3 matches, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern - time=0.106 | max_time=IdentityPattern:0.012
[GraphBuilderPatternOptimization-LOA.optimize] iteration 9: 878 nodes, priority=2
[GraphBuilderPatternOptimization-LOA.optimize] applies 6 matches, 2*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.092 | max_time=ShapeBasedEditDistanceReshapePattern:0.007
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=19, n_removed=26, n_applied=624 applied patterns, 876 nodes left with 3 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 10: 876 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 5 matches, 1*ShapeBasedEditDistanceReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.110 | max_time=IdentityPattern:0.013
[GraphBuilderPatternOptimization-LOA.optimize] iteration 11: 882 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 9 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ReshapeReshapePattern, 3*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.107 | max_time=IdentityPattern:0.011
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=645 applied patterns, 875 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 12: 875 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 8 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.117 | max_time=IdentityPattern:0.016
[GraphBuilderPatternOptimization-LOA.optimize] iteration 13: 878 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 14 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 3*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.112 | max_time=IdentityPattern:0.013
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=674 applied patterns, 866 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 14: 866 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.113 | max_time=IdentityPattern:0.016
[GraphBuilderPatternOptimization-LOA.optimize] iteration 15: 865 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.113 | max_time=IdentityPattern:0.011
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=708 applied patterns, 852 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 16: 852 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.121 | max_time=IdentityPattern:0.014
[GraphBuilderPatternOptimization-LOA.optimize] iteration 17: 851 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.125 | max_time=ShapeBasedEditDistanceReshapePattern:0.010
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=742 applied patterns, 838 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 18: 838 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.116 | max_time=IdentityPattern:0.010
[GraphBuilderPatternOptimization-LOA.optimize] iteration 19: 837 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.148 | max_time=ShapeBasedEditDistanceReshapePattern:0.012
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=776 applied patterns, 824 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 20: 824 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.108 | max_time=IdentityPattern:0.008
[GraphBuilderPatternOptimization-LOA.optimize] iteration 21: 823 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.109 | max_time=IdentityPattern:0.014
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=810 applied patterns, 810 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 22: 810 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.105 | max_time=IdentityPattern:0.008
[GraphBuilderPatternOptimization-LOA.optimize] iteration 23: 809 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.093 | max_time=IdentityPattern:0.009
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=844 applied patterns, 796 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 24: 796 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.096 | max_time=IdentityPattern:0.006
[GraphBuilderPatternOptimization-LOA.optimize] iteration 25: 795 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.099 | max_time=IdentityPattern:0.011
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=878 applied patterns, 782 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 26: 782 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.106 | max_time=IdentityPattern:0.012
[GraphBuilderPatternOptimization-LOA.optimize] iteration 27: 781 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.106 | max_time=IdentityPattern:0.013
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=912 applied patterns, 768 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 28: 768 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.105 | max_time=IdentityPattern:0.012
[GraphBuilderPatternOptimization-LOA.optimize] iteration 29: 767 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.093 | max_time=IdentityPattern:0.007
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=946 applied patterns, 754 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 30: 754 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.097 | max_time=IdentityPattern:0.016
[GraphBuilderPatternOptimization-LOA.optimize] iteration 31: 753 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.099 | max_time=IdentityPattern:0.007
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=980 applied patterns, 740 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 32: 740 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.098 | max_time=ShapeBasedEditDistanceReshapePattern:0.008
[GraphBuilderPatternOptimization-LOA.optimize] iteration 33: 739 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.086 | max_time=ShapeBasedEditDistanceReshapePattern:0.006
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1014 applied patterns, 726 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 34: 726 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.092 | max_time=IdentityPattern:0.009
[GraphBuilderPatternOptimization-LOA.optimize] iteration 35: 725 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 14 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 3*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.090 | max_time=ShapeBasedEditDistanceReshapePattern:0.006
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1047 applied patterns, 713 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 36: 713 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.093 | max_time=IdentityPattern:0.007
[GraphBuilderPatternOptimization-LOA.optimize] iteration 37: 712 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.090 | max_time=IdentityPattern:0.006
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1081 applied patterns, 699 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 38: 699 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.086 | max_time=IdentityPattern:0.009
[GraphBuilderPatternOptimization-LOA.optimize] iteration 39: 698 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.082 | max_time=ShapeBasedEditDistanceReshapePattern:0.005
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1115 applied patterns, 685 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 40: 685 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.086 | max_time=IdentityPattern:0.006
[GraphBuilderPatternOptimization-LOA.optimize] iteration 41: 684 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.083 | max_time=IdentityPattern:0.005
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1149 applied patterns, 671 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 42: 671 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.099 | max_time=IdentityPattern:0.012
[GraphBuilderPatternOptimization-LOA.optimize] iteration 43: 670 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.073 | max_time=IdentityPattern:0.005
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1183 applied patterns, 657 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 44: 657 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.082 | max_time=IdentityPattern:0.005
[GraphBuilderPatternOptimization-LOA.optimize] iteration 45: 656 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.080 | max_time=IdentityPattern:0.006
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1217 applied patterns, 643 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 46: 643 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.086 | max_time=IdentityPattern:0.013
[GraphBuilderPatternOptimization-LOA.optimize] iteration 47: 642 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.074 | max_time=IdentityPattern:0.005
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1251 applied patterns, 629 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 48: 629 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.083 | max_time=IdentityPattern:0.009
[GraphBuilderPatternOptimization-LOA.optimize] iteration 49: 628 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.072 | max_time=SameChildrenPattern:0.006
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1285 applied patterns, 615 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 50: 615 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.099 | max_time=IdentityPattern:0.009
[GraphBuilderPatternOptimization-LOA.optimize] iteration 51: 614 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.071 | max_time=SameChildrenPattern:0.005
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1319 applied patterns, 601 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 52: 601 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.081 | max_time=SameChildrenPattern:0.006
[GraphBuilderPatternOptimization-LOA.optimize] iteration 53: 600 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.086 | max_time=SameChildrenPattern:0.006
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=21, n_removed=29, n_applied=1353 applied patterns, 587 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 54: 587 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.080 | max_time=IdentityPattern:0.015
[GraphBuilderPatternOptimization-LOA.optimize] iteration 55: 584 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 14 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbedding3DPattern - time=0.070 | max_time=SameChildrenPattern:0.006
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=11, n_removed=15, n_applied=1383 applied patterns, 569 nodes left with 4 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 56: 569 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 8 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 1*MultiHeadAttention3DPattern - time=0.082 | max_time=IdentityPattern:0.012
[GraphBuilderPatternOptimization-LOA.optimize] iteration 57: 560 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 7 matches, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 1*SameChildrenPattern - time=0.097 | max_time=IdentityPattern:0.008
[GraphBuilderPatternOptimization-LOA.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-LOA.optimize] n_added=0, n_removed=0, n_applied=1398 applied patterns, 553 nodes left with 1 iterations
[GraphBuilderPatternOptimization-LOA.optimize] increase priority to 3
[GraphBuilderPatternOptimization-LOA.optimize] iteration 58: 553 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] applies 5 matches, 5*ShapedBasedReshapePattern - time=0.062 | max_time=SameChildrenPattern:0.005
[GraphBuilderPatternOptimization-LOA.optimize] iteration 59: 548 nodes, priority=3
[GraphBuilderPatternOptimization-LOA.optimize] stops current_priority_index=4, priorities=[0, 1, 2, 3]
[GraphBuilderPatternOptimization-LOA.optimize] done after 60 iterations with 548 nodes in 12.214
[OrderOptimization.optimize] ALGO-2
[OrderOptimization.random_order] -- starts with 473 nodes, 353 initializers
[OrderOptimization.shape_order] done after in 0.0028569269998115487s with changed=4 scale=16
[GraphBuilder-LOA.optimize] done with 473 nodes in 14.628
[GraphBuilder-LOA.to_onnx] make_model 499 inits 341 params
[GraphBuilder-LOA.time_evaluation_constants_] 0.0009636320010031341
[GraphBuilder-LOA._build_initializers] start with 499 initializers, large_model=True, external_threshold=1024
[GraphBuilder-LOA._build_initializers] switch low/high order
[GraphBuilder-LOA._build_initializers] done in 7.992999599082395e-06s with 353 initializers, 341 large initializers
[GraphBuilder-LOA._add_shape_information] dynamic shapes replacements={'seq_length': 'seq_length', 'cache_length': 'cache_length', 'batch': 'batch', 'batch^s3^batch^s41': 'batch', 's77': 'batch', 's67': 'batch', 's90': 'batch', 's89': 'batch', 's104': 'batch', 's47': 'batch', 's62': 'batch', 'batch^s49^batch^s26': 'batch', 's83': 'batch', 's26': 'batch', 'batch^s87^batch^s23': 'batch', 'batch^s82^batch^s62': 'batch', 's36': 'batch', 's59': 'batch', 's64': 'batch', 's102': 'batch', 's3': 'batch', 's75': 'batch', 's10': 'batch', 'batch^s52^batch^s93': 'batch', 's45': 'batch', 's106': 'batch', 's56': 'batch', 's57': 'batch', 's41': 'batch', 'batch^s104^batch^s106': 'batch', 'batch^s36^batch^s13': 'batch', 's93': 'batch', 's13': 'batch', 's97': 'batch', 's91': 'batch', 's71': 'batch', 'batch^s35^batch^s60': 'batch', 'batch^s92^batch^s83': 'batch', 'batch^s64^batch^s86': 'batch', 's82': 'batch', 's86': 'batch', 's98': 'batch', 'batch^s48^batch^s59': 'batch', 's34': 'batch', 's35': 'batch', 'batch^s34^batch^s77': 'batch', 's84': 'batch', 's8': 'batch', 's79': 'batch', 'batch^s98^batch^s79': 'batch', 's29': 'batch', 'batch^s29^batch^s8': 'batch', 'batch^s90^batch^s57': 'batch', 's60': 'batch', 's52': 'batch', 'batch^s97^batch^s10': 'batch', 's30': 'batch', 'batch^s84^batch^s91': 'batch', 's39': 'batch', 's69': 'batch', 'batch^s67^batch^s61': 'batch', 'batch^s39^batch^s71': 'batch', 's61': 'batch', 's23': 'batch', 's49': 'batch', 'batch^s69^batch^s56': 'batch', 'batch^s30^batch^s89': 'batch', 's87': 'batch', 's72': 'batch', 's1': 'batch', 's92': 'batch', 'batch^s100^batch^s102': 'batch', 's48': 'batch', 'batch^s45^batch^s47': 'batch', 'batch^s1^batch^s75': 'batch', 's43': 'batch', 's100': 'batch', 's70': 'seq_length', 's9': 'cache_length', 's11': 'cache_length', 's40': 'cache_length', 's24': 'cache_length', 's4': 'cache_length', 's44': 'cache_length', 's78': 'cache_length', 's51': 'cache_length', 's88': 'cache_length', 's27': 'cache_length', 's81': 'cache_length', 's31': 'cache_length', 's18': 'cache_length', 's74': 'cache_length', 's94': 'cache_length', 's38': 'cache_length', 's96': 'cache_length', 's33': 'cache_length', 's63': 'cache_length', 's73': 'cache_length', 's80': 'cache_length', 's42': 'cache_length', 's21': 'cache_length', 's7': 'cache_length', 's15': 'cache_length', 's85': 'cache_length', 's65': 'cache_length', 's14': 'cache_length', 's32': 'cache_length', 's46': 'cache_length', 's105': 'cache_length', 's58': 'cache_length', 's99': 'cache_length', 's103': 'cache_length', 's66': 'cache_length', 's107': 'cache_length', 's76': 'cache_length', 's37': 'cache_length', 's2': 'cache_length', 's28': 'cache_length', 's101': 'cache_length', 's54': 'cache_length', 's95': 'cache_length', 's68': 'cache_length', 's22': 'cache_length', 's55': 'cache_length', 's50': 'cache_length', 's25': 'cache_length'}
[to_onnx] to_onnx done in 14.889057660999242s and 473 nodes, 353 initializers, 50 inputs, 49 outputs
-- done in 38.13969019199976
onnx_generate¶
Then we can call method generate for two tokens.
This function is part of onnx_diagnostic but follows the implementation
seen earlier for a torch model.
Let’s ask first the function to return the session to avoid creating on the second call.
_res, session, _feeds = onnx_generate(
model_name, inputs.input_ids, eos_token_id=2, max_new_tokens=2, return_session=True
)
# And now the full answer.
print("-- compute the answer with custom generate...")
begin = time.perf_counter()
outputs = onnx_generate(
session, inputs.input_ids, eos_token_id=tokenizer.eos_token_id, max_new_tokens=100
)
duration = time.perf_counter() - begin
print(f"-- done in {duration}")
data.append(dict(name="onnx", duration=duration))
print("-- done.")
print("output shape:", string_type(outputs, with_shape=True, with_min_max=True))
print("-- decode the answer...")
text = tokenizer.batch_decode(outputs)[0]
print("-- done.")
print(text)
-- compute the answer with custom generate...
-- done in 1.8495868000009068
-- done.
output shape: T7s1x123[7,50285:A10138.878048780489]
-- decode the answer...
-- done.
def print_prime(n):
"""
Print all primes between 1 and n
"""
primes = []
for num in range(2, n+1):
is_prime = True
for i in range(2, int(math.sqrt(num))+1):
if num % i == 0:
is_prime = False
break
if is_prime:
primes.append(num)
print(primes)
print_prime(20)
``
Plots¶
df = pandas.DataFrame(data).set_index("name")
print(df)
duration
name
generate 3.732448
custom 4.518990
onnx 1.849587

Total running time of the script: (0 minutes 57.100 seconds)
Related examples
LayerNormalization implementation cannot be exchanged