From a LLM to processing a prompt

Method generate generates the model answer for a given prompt. Let’s implement our own to understand better how it works and then apply it to an ONNX model.

Example with Phi 1.5

epkg:microsoft/Phi-1.5 is a small LLM. The example given

import os
import time
import sys
import pandas
from tqdm import tqdm
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from onnx_diagnostic.ext_test_case import unit_test_going
from onnx_diagnostic.helpers import string_type
from onnx_diagnostic.helpers.torch_helper import to_any, get_weight_type
from onnx_diagnostic.helpers.rt_helper import onnx_generate
from onnx_diagnostic.torch_export_patches import torch_export_patches
from onnx_diagnostic.torch_models.hghub import get_untrained_model_with_inputs
from onnx_diagnostic.torch_models.hghub.hub_api import get_pretrained_config, task_from_id
from onnx_diagnostic.tasks import random_input_kwargs
from onnx_diagnostic.export.api import to_onnx

device = "cuda" if torch.cuda.is_available() else "cpu"
data = []

print("-- load the model...")
if unit_test_going():
    # unit_test_going() returns True if UNITTEST_GOING is 1
    # The example switches to a faster scenario.
    model_id = "arnir0/Tiny-LLM"
    data_export = get_untrained_model_with_inputs(model_id)
    model = data_export["model"]
    export_inputs = data_export["inputs"]
    export_shapes = data_export["dynamic_shapes"]
    tokenizer = AutoTokenizer.from_pretrained(model_id)
else:
    model_id = "microsoft/phi-1_5"
    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto")
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    config = get_pretrained_config(model_id)
    task = task = task_from_id(model_id)
    kwargs, fct = random_input_kwargs(config, task)
    res = fct(model, config, add_second_input=False, **kwargs)
    export_inputs = res["inputs"]
    export_shapes = res["dynamic_shapes"]
model = model.to(device)
print("-- done.")

print("-- tokenize the prompt...")
inputs = tokenizer(
    '''def print_prime(n):
   """
   Print all primes between 1 and n
   """''',
    return_tensors="pt",
    return_attention_mask=False,
).to(device)
print("-- done.")

print("-- compute the answer...")
begin = time.perf_counter()
outputs = model.generate(**inputs, max_new_tokens=100)
duration = time.perf_counter() - begin
print(f"-- done in {duration}")
data.append(dict(name="generate", duration=duration))
print("output shape:", string_type(outputs, with_shape=True, with_min_max=True))
print("-- decode the answer...")
text = tokenizer.batch_decode(outputs)[0]
print("-- done.")
print(text)
-- load the model...

Loading weights:   0%|          | 0/341 [00:00<?, ?it/s]
Loading weights:   0%|          | 1/341 [00:00<00:00, 15363.75it/s, Materializing param=lm_head.bias]
Loading weights:   0%|          | 1/341 [00:00<00:00, 6141.00it/s, Materializing param=lm_head.bias]
Loading weights:   1%|          | 2/341 [00:00<00:00, 5200.62it/s, Materializing param=lm_head.weight]
Loading weights:   1%|          | 2/341 [00:00<00:00, 4106.02it/s, Materializing param=lm_head.weight]
Loading weights:   1%|          | 3/341 [00:00<00:00, 1247.81it/s, Materializing param=model.embed_tokens.weight]
Loading weights:   1%|          | 3/341 [00:00<00:00, 1183.94it/s, Materializing param=model.embed_tokens.weight]
Loading weights:   1%|          | 4/341 [00:00<00:00, 1364.22it/s, Materializing param=model.final_layernorm.bias]
Loading weights:   1%|          | 4/341 [00:00<00:00, 1326.37it/s, Materializing param=model.final_layernorm.bias]
Loading weights:   1%|▏         | 5/341 [00:00<00:00, 1510.59it/s, Materializing param=model.final_layernorm.weight]
Loading weights:   1%|▏         | 5/341 [00:00<00:00, 1471.17it/s, Materializing param=model.final_layernorm.weight]
Loading weights:   2%|▏         | 6/341 [00:00<00:00, 1540.42it/s, Materializing param=model.layers.0.input_layernorm.bias]
Loading weights:   2%|▏         | 6/341 [00:00<00:00, 1489.63it/s, Materializing param=model.layers.0.input_layernorm.bias]
Loading weights:   2%|▏         | 7/341 [00:00<00:00, 1496.29it/s, Materializing param=model.layers.0.input_layernorm.weight]
Loading weights:   2%|▏         | 7/341 [00:00<00:00, 1455.85it/s, Materializing param=model.layers.0.input_layernorm.weight]
Loading weights:   2%|▏         | 8/341 [00:00<00:00, 1367.67it/s, Materializing param=model.layers.0.mlp.fc1.bias]
Loading weights:   2%|▏         | 8/341 [00:00<00:00, 1341.91it/s, Materializing param=model.layers.0.mlp.fc1.bias]
Loading weights:   3%|▎         | 9/341 [00:00<00:00, 1357.82it/s, Materializing param=model.layers.0.mlp.fc1.weight]
Loading weights:   3%|▎         | 9/341 [00:00<00:00, 1339.42it/s, Materializing param=model.layers.0.mlp.fc1.weight]
Loading weights:   3%|▎         | 10/341 [00:00<00:00, 1389.62it/s, Materializing param=model.layers.0.mlp.fc2.bias]
Loading weights:   3%|▎         | 10/341 [00:00<00:00, 1371.90it/s, Materializing param=model.layers.0.mlp.fc2.bias]
Loading weights:   3%|▎         | 11/341 [00:00<00:00, 1384.05it/s, Materializing param=model.layers.0.mlp.fc2.weight]
Loading weights:   3%|▎         | 11/341 [00:00<00:00, 1367.56it/s, Materializing param=model.layers.0.mlp.fc2.weight]
Loading weights:   4%|▎         | 12/341 [00:00<00:00, 1382.13it/s, Materializing param=model.layers.0.self_attn.dense.bias]
Loading weights:   4%|▎         | 12/341 [00:00<00:00, 1366.56it/s, Materializing param=model.layers.0.self_attn.dense.bias]
Loading weights:   4%|▍         | 13/341 [00:00<00:00, 1299.72it/s, Materializing param=model.layers.0.self_attn.dense.weight]
Loading weights:   4%|▍         | 13/341 [00:00<00:00, 1288.51it/s, Materializing param=model.layers.0.self_attn.dense.weight]
Loading weights:   4%|▍         | 14/341 [00:00<00:00, 1274.34it/s, Materializing param=model.layers.0.self_attn.k_proj.bias]
Loading weights:   4%|▍         | 14/341 [00:00<00:00, 1264.49it/s, Materializing param=model.layers.0.self_attn.k_proj.bias]
Loading weights:   4%|▍         | 15/341 [00:00<00:00, 1246.85it/s, Materializing param=model.layers.0.self_attn.k_proj.weight]
Loading weights:   4%|▍         | 15/341 [00:00<00:00, 1238.11it/s, Materializing param=model.layers.0.self_attn.k_proj.weight]
Loading weights:   5%|▍         | 16/341 [00:00<00:00, 1289.36it/s, Materializing param=model.layers.0.self_attn.q_proj.bias]
Loading weights:   5%|▍         | 16/341 [00:00<00:00, 1281.58it/s, Materializing param=model.layers.0.self_attn.q_proj.bias]
Loading weights:   5%|▍         | 17/341 [00:00<00:00, 1312.77it/s, Materializing param=model.layers.0.self_attn.q_proj.weight]
Loading weights:   5%|▍         | 17/341 [00:00<00:00, 1301.49it/s, Materializing param=model.layers.0.self_attn.q_proj.weight]
Loading weights:   5%|▌         | 18/341 [00:00<00:00, 1332.51it/s, Materializing param=model.layers.0.self_attn.v_proj.bias]
Loading weights:   5%|▌         | 18/341 [00:00<00:00, 1322.24it/s, Materializing param=model.layers.0.self_attn.v_proj.bias]
Loading weights:   6%|▌         | 19/341 [00:00<00:00, 1351.30it/s, Materializing param=model.layers.0.self_attn.v_proj.weight]
Loading weights:   6%|▌         | 19/341 [00:00<00:00, 1343.10it/s, Materializing param=model.layers.0.self_attn.v_proj.weight]
Loading weights:   6%|▌         | 20/341 [00:00<00:00, 1370.13it/s, Materializing param=model.layers.1.input_layernorm.bias]
Loading weights:   6%|▌         | 20/341 [00:00<00:00, 1361.08it/s, Materializing param=model.layers.1.input_layernorm.bias]
Loading weights:   6%|▌         | 21/341 [00:00<00:00, 1409.29it/s, Materializing param=model.layers.1.input_layernorm.weight]
Loading weights:   6%|▌         | 21/341 [00:00<00:00, 1402.40it/s, Materializing param=model.layers.1.input_layernorm.weight]
Loading weights:   6%|▋         | 22/341 [00:00<00:00, 1409.01it/s, Materializing param=model.layers.1.mlp.fc1.bias]
Loading weights:   6%|▋         | 22/341 [00:00<00:00, 1401.35it/s, Materializing param=model.layers.1.mlp.fc1.bias]
Loading weights:   7%|▋         | 23/341 [00:00<00:00, 1415.68it/s, Materializing param=model.layers.1.mlp.fc1.weight]
Loading weights:   7%|▋         | 23/341 [00:00<00:00, 1407.92it/s, Materializing param=model.layers.1.mlp.fc1.weight]
Loading weights:   7%|▋         | 24/341 [00:00<00:00, 1430.36it/s, Materializing param=model.layers.1.mlp.fc2.bias]
Loading weights:   7%|▋         | 24/341 [00:00<00:00, 1422.12it/s, Materializing param=model.layers.1.mlp.fc2.bias]
Loading weights:   7%|▋         | 25/341 [00:00<00:00, 1405.88it/s, Materializing param=model.layers.1.mlp.fc2.weight]
Loading weights:   7%|▋         | 25/341 [00:00<00:00, 1399.46it/s, Materializing param=model.layers.1.mlp.fc2.weight]
Loading weights:   8%|▊         | 26/341 [00:00<00:00, 1415.95it/s, Materializing param=model.layers.1.self_attn.dense.bias]
Loading weights:   8%|▊         | 26/341 [00:00<00:00, 1409.07it/s, Materializing param=model.layers.1.self_attn.dense.bias]
Loading weights:   8%|▊         | 27/341 [00:00<00:00, 1427.14it/s, Materializing param=model.layers.1.self_attn.dense.weight]
Loading weights:   8%|▊         | 27/341 [00:00<00:00, 1420.83it/s, Materializing param=model.layers.1.self_attn.dense.weight]
Loading weights:   8%|▊         | 28/341 [00:00<00:00, 1423.28it/s, Materializing param=model.layers.1.self_attn.k_proj.bias]
Loading weights:   8%|▊         | 28/341 [00:00<00:00, 1417.06it/s, Materializing param=model.layers.1.self_attn.k_proj.bias]
Loading weights:   9%|▊         | 29/341 [00:00<00:00, 1441.22it/s, Materializing param=model.layers.1.self_attn.k_proj.weight]
Loading weights:   9%|▊         | 29/341 [00:00<00:00, 1432.65it/s, Materializing param=model.layers.1.self_attn.k_proj.weight]
Loading weights:   9%|▉         | 30/341 [00:00<00:00, 1410.21it/s, Materializing param=model.layers.1.self_attn.q_proj.bias]
Loading weights:   9%|▉         | 30/341 [00:00<00:00, 1402.76it/s, Materializing param=model.layers.1.self_attn.q_proj.bias]
Loading weights:   9%|▉         | 31/341 [00:00<00:00, 1407.52it/s, Materializing param=model.layers.1.self_attn.q_proj.weight]
Loading weights:   9%|▉         | 31/341 [00:00<00:00, 1399.92it/s, Materializing param=model.layers.1.self_attn.q_proj.weight]
Loading weights:   9%|▉         | 32/341 [00:00<00:00, 1424.11it/s, Materializing param=model.layers.1.self_attn.v_proj.bias]
Loading weights:   9%|▉         | 32/341 [00:00<00:00, 1416.43it/s, Materializing param=model.layers.1.self_attn.v_proj.bias]
Loading weights:  10%|▉         | 33/341 [00:00<00:00, 1406.45it/s, Materializing param=model.layers.1.self_attn.v_proj.weight]
Loading weights:  10%|▉         | 33/341 [00:00<00:00, 1400.52it/s, Materializing param=model.layers.1.self_attn.v_proj.weight]
Loading weights:  10%|▉         | 34/341 [00:00<00:00, 1397.80it/s, Materializing param=model.layers.2.input_layernorm.bias]
Loading weights:  10%|▉         | 34/341 [00:00<00:00, 1392.33it/s, Materializing param=model.layers.2.input_layernorm.bias]
Loading weights:  10%|█         | 35/341 [00:00<00:00, 1411.11it/s, Materializing param=model.layers.2.input_layernorm.weight]
Loading weights:  10%|█         | 35/341 [00:00<00:00, 1405.34it/s, Materializing param=model.layers.2.input_layernorm.weight]
Loading weights:  11%|█         | 36/341 [00:00<00:00, 1423.27it/s, Materializing param=model.layers.2.mlp.fc1.bias]
Loading weights:  11%|█         | 36/341 [00:00<00:00, 1418.07it/s, Materializing param=model.layers.2.mlp.fc1.bias]
Loading weights:  11%|█         | 37/341 [00:00<00:00, 1412.87it/s, Materializing param=model.layers.2.mlp.fc1.weight]
Loading weights:  11%|█         | 37/341 [00:00<00:00, 1407.60it/s, Materializing param=model.layers.2.mlp.fc1.weight]
Loading weights:  11%|█         | 38/341 [00:00<00:00, 1412.85it/s, Materializing param=model.layers.2.mlp.fc2.bias]
Loading weights:  11%|█         | 38/341 [00:00<00:00, 1408.04it/s, Materializing param=model.layers.2.mlp.fc2.bias]
Loading weights:  11%|█▏        | 39/341 [00:00<00:00, 1404.81it/s, Materializing param=model.layers.2.mlp.fc2.weight]
Loading weights:  11%|█▏        | 39/341 [00:00<00:00, 1400.70it/s, Materializing param=model.layers.2.mlp.fc2.weight]
Loading weights:  12%|█▏        | 40/341 [00:00<00:00, 1413.09it/s, Materializing param=model.layers.2.self_attn.dense.bias]
Loading weights:  12%|█▏        | 40/341 [00:00<00:00, 1408.31it/s, Materializing param=model.layers.2.self_attn.dense.bias]
Loading weights:  12%|█▏        | 41/341 [00:00<00:00, 1400.49it/s, Materializing param=model.layers.2.self_attn.dense.weight]
Loading weights:  12%|█▏        | 41/341 [00:00<00:00, 1395.89it/s, Materializing param=model.layers.2.self_attn.dense.weight]
Loading weights:  12%|█▏        | 42/341 [00:00<00:00, 1388.62it/s, Materializing param=model.layers.2.self_attn.k_proj.bias]
Loading weights:  12%|█▏        | 42/341 [00:00<00:00, 1383.54it/s, Materializing param=model.layers.2.self_attn.k_proj.bias]
Loading weights:  13%|█▎        | 43/341 [00:00<00:00, 1400.47it/s, Materializing param=model.layers.2.self_attn.k_proj.weight]
Loading weights:  13%|█▎        | 43/341 [00:00<00:00, 1395.66it/s, Materializing param=model.layers.2.self_attn.k_proj.weight]
Loading weights:  13%|█▎        | 44/341 [00:00<00:00, 1392.51it/s, Materializing param=model.layers.2.self_attn.q_proj.bias]
Loading weights:  13%|█▎        | 44/341 [00:00<00:00, 1387.72it/s, Materializing param=model.layers.2.self_attn.q_proj.bias]
Loading weights:  13%|█▎        | 45/341 [00:00<00:00, 1402.89it/s, Materializing param=model.layers.2.self_attn.q_proj.weight]
Loading weights:  13%|█▎        | 45/341 [00:00<00:00, 1397.85it/s, Materializing param=model.layers.2.self_attn.q_proj.weight]
Loading weights:  13%|█▎        | 46/341 [00:00<00:00, 1394.67it/s, Materializing param=model.layers.2.self_attn.v_proj.bias]
Loading weights:  13%|█▎        | 46/341 [00:00<00:00, 1390.90it/s, Materializing param=model.layers.2.self_attn.v_proj.bias]
Loading weights:  14%|█▍        | 47/341 [00:00<00:00, 1376.69it/s, Materializing param=model.layers.2.self_attn.v_proj.weight]
Loading weights:  14%|█▍        | 47/341 [00:00<00:00, 1372.95it/s, Materializing param=model.layers.2.self_attn.v_proj.weight]
Loading weights:  14%|█▍        | 48/341 [00:00<00:00, 1386.00it/s, Materializing param=model.layers.3.input_layernorm.bias]
Loading weights:  14%|█▍        | 48/341 [00:00<00:00, 1381.20it/s, Materializing param=model.layers.3.input_layernorm.bias]
Loading weights:  14%|█▍        | 49/341 [00:00<00:00, 1401.46it/s, Materializing param=model.layers.3.input_layernorm.weight]
Loading weights:  14%|█▍        | 49/341 [00:00<00:00, 1398.60it/s, Materializing param=model.layers.3.input_layernorm.weight]
Loading weights:  15%|█▍        | 50/341 [00:00<00:00, 1409.33it/s, Materializing param=model.layers.3.mlp.fc1.bias]
Loading weights:  15%|█▍        | 50/341 [00:00<00:00, 1406.02it/s, Materializing param=model.layers.3.mlp.fc1.bias]
Loading weights:  15%|█▍        | 51/341 [00:00<00:00, 1419.81it/s, Materializing param=model.layers.3.mlp.fc1.weight]
Loading weights:  15%|█▍        | 51/341 [00:00<00:00, 1416.68it/s, Materializing param=model.layers.3.mlp.fc1.weight]
Loading weights:  15%|█▌        | 52/341 [00:00<00:00, 1425.57it/s, Materializing param=model.layers.3.mlp.fc2.bias]
Loading weights:  15%|█▌        | 52/341 [00:00<00:00, 1422.39it/s, Materializing param=model.layers.3.mlp.fc2.bias]
Loading weights:  16%|█▌        | 53/341 [00:00<00:00, 1435.77it/s, Materializing param=model.layers.3.mlp.fc2.weight]
Loading weights:  16%|█▌        | 53/341 [00:00<00:00, 1432.14it/s, Materializing param=model.layers.3.mlp.fc2.weight]
Loading weights:  16%|█▌        | 54/341 [00:00<00:00, 1438.94it/s, Materializing param=model.layers.3.self_attn.dense.bias]
Loading weights:  16%|█▌        | 54/341 [00:00<00:00, 1435.71it/s, Materializing param=model.layers.3.self_attn.dense.bias]
Loading weights:  16%|█▌        | 55/341 [00:00<00:00, 1423.42it/s, Materializing param=model.layers.3.self_attn.dense.weight]
Loading weights:  16%|█▌        | 55/341 [00:00<00:00, 1419.99it/s, Materializing param=model.layers.3.self_attn.dense.weight]
Loading weights:  16%|█▋        | 56/341 [00:00<00:00, 1438.19it/s, Materializing param=model.layers.3.self_attn.k_proj.bias]
Loading weights:  16%|█▋        | 56/341 [00:00<00:00, 1435.50it/s, Materializing param=model.layers.3.self_attn.k_proj.bias]
Loading weights:  17%|█▋        | 57/341 [00:00<00:00, 1443.50it/s, Materializing param=model.layers.3.self_attn.k_proj.weight]
Loading weights:  17%|█▋        | 57/341 [00:00<00:00, 1440.28it/s, Materializing param=model.layers.3.self_attn.k_proj.weight]
Loading weights:  17%|█▋        | 58/341 [00:00<00:00, 1446.25it/s, Materializing param=model.layers.3.self_attn.q_proj.bias]
Loading weights:  17%|█▋        | 58/341 [00:00<00:00, 1443.15it/s, Materializing param=model.layers.3.self_attn.q_proj.bias]
Loading weights:  17%|█▋        | 59/341 [00:00<00:00, 1421.38it/s, Materializing param=model.layers.3.self_attn.q_proj.weight]
Loading weights:  17%|█▋        | 59/341 [00:00<00:00, 1418.39it/s, Materializing param=model.layers.3.self_attn.q_proj.weight]
Loading weights:  18%|█▊        | 60/341 [00:00<00:00, 1429.38it/s, Materializing param=model.layers.3.self_attn.v_proj.bias]
Loading weights:  18%|█▊        | 60/341 [00:00<00:00, 1426.70it/s, Materializing param=model.layers.3.self_attn.v_proj.bias]
Loading weights:  18%|█▊        | 61/341 [00:00<00:00, 1402.25it/s, Materializing param=model.layers.3.self_attn.v_proj.weight]
Loading weights:  18%|█▊        | 61/341 [00:00<00:00, 1398.35it/s, Materializing param=model.layers.3.self_attn.v_proj.weight]
Loading weights:  18%|█▊        | 62/341 [00:00<00:00, 1391.95it/s, Materializing param=model.layers.4.input_layernorm.bias]
Loading weights:  18%|█▊        | 62/341 [00:00<00:00, 1388.09it/s, Materializing param=model.layers.4.input_layernorm.bias]
Loading weights:  18%|█▊        | 63/341 [00:00<00:00, 1400.29it/s, Materializing param=model.layers.4.input_layernorm.weight]
Loading weights:  18%|█▊        | 63/341 [00:00<00:00, 1396.57it/s, Materializing param=model.layers.4.input_layernorm.weight]
Loading weights:  19%|█▉        | 64/341 [00:00<00:00, 1408.87it/s, Materializing param=model.layers.4.mlp.fc1.bias]
Loading weights:  19%|█▉        | 64/341 [00:00<00:00, 1405.08it/s, Materializing param=model.layers.4.mlp.fc1.bias]
Loading weights:  19%|█▉        | 65/341 [00:00<00:00, 1415.21it/s, Materializing param=model.layers.4.mlp.fc1.weight]
Loading weights:  19%|█▉        | 65/341 [00:00<00:00, 1412.44it/s, Materializing param=model.layers.4.mlp.fc1.weight]
Loading weights:  19%|█▉        | 66/341 [00:00<00:00, 1412.22it/s, Materializing param=model.layers.4.mlp.fc2.bias]
Loading weights:  19%|█▉        | 66/341 [00:00<00:00, 1408.61it/s, Materializing param=model.layers.4.mlp.fc2.bias]
Loading weights:  20%|█▉        | 67/341 [00:00<00:00, 1418.19it/s, Materializing param=model.layers.4.mlp.fc2.weight]
Loading weights:  20%|█▉        | 67/341 [00:00<00:00, 1415.27it/s, Materializing param=model.layers.4.mlp.fc2.weight]
Loading weights:  20%|█▉        | 68/341 [00:00<00:00, 1396.44it/s, Materializing param=model.layers.4.self_attn.dense.bias]
Loading weights:  20%|█▉        | 68/341 [00:00<00:00, 1392.82it/s, Materializing param=model.layers.4.self_attn.dense.bias]
Loading weights:  20%|██        | 69/341 [00:00<00:00, 1386.33it/s, Materializing param=model.layers.4.self_attn.dense.weight]
Loading weights:  20%|██        | 69/341 [00:00<00:00, 1383.49it/s, Materializing param=model.layers.4.self_attn.dense.weight]
Loading weights:  21%|██        | 70/341 [00:00<00:00, 1392.17it/s, Materializing param=model.layers.4.self_attn.k_proj.bias]
Loading weights:  21%|██        | 70/341 [00:00<00:00, 1389.18it/s, Materializing param=model.layers.4.self_attn.k_proj.bias]
Loading weights:  21%|██        | 71/341 [00:00<00:00, 1386.28it/s, Materializing param=model.layers.4.self_attn.k_proj.weight]
Loading weights:  21%|██        | 71/341 [00:00<00:00, 1383.86it/s, Materializing param=model.layers.4.self_attn.k_proj.weight]
Loading weights:  21%|██        | 72/341 [00:00<00:00, 1375.57it/s, Materializing param=model.layers.4.self_attn.q_proj.bias]
Loading weights:  21%|██        | 72/341 [00:00<00:00, 1373.04it/s, Materializing param=model.layers.4.self_attn.q_proj.bias]
Loading weights:  21%|██▏       | 73/341 [00:00<00:00, 1386.67it/s, Materializing param=model.layers.4.self_attn.q_proj.weight]
Loading weights:  21%|██▏       | 73/341 [00:00<00:00, 1384.83it/s, Materializing param=model.layers.4.self_attn.q_proj.weight]
Loading weights:  22%|██▏       | 74/341 [00:00<00:00, 1387.77it/s, Materializing param=model.layers.4.self_attn.v_proj.bias]
Loading weights:  22%|██▏       | 74/341 [00:00<00:00, 1385.39it/s, Materializing param=model.layers.4.self_attn.v_proj.bias]
Loading weights:  22%|██▏       | 75/341 [00:00<00:00, 1382.68it/s, Materializing param=model.layers.4.self_attn.v_proj.weight]
Loading weights:  22%|██▏       | 75/341 [00:00<00:00, 1380.56it/s, Materializing param=model.layers.4.self_attn.v_proj.weight]
Loading weights:  22%|██▏       | 76/341 [00:00<00:00, 1385.22it/s, Materializing param=model.layers.5.input_layernorm.bias]
Loading weights:  22%|██▏       | 76/341 [00:00<00:00, 1383.09it/s, Materializing param=model.layers.5.input_layernorm.bias]
Loading weights:  23%|██▎       | 77/341 [00:00<00:00, 1382.91it/s, Materializing param=model.layers.5.input_layernorm.weight]
Loading weights:  23%|██▎       | 77/341 [00:00<00:00, 1380.18it/s, Materializing param=model.layers.5.input_layernorm.weight]
Loading weights:  23%|██▎       | 78/341 [00:00<00:00, 1389.05it/s, Materializing param=model.layers.5.mlp.fc1.bias]
Loading weights:  23%|██▎       | 78/341 [00:00<00:00, 1386.81it/s, Materializing param=model.layers.5.mlp.fc1.bias]
Loading weights:  23%|██▎       | 79/341 [00:00<00:00, 1382.73it/s, Materializing param=model.layers.5.mlp.fc1.weight]
Loading weights:  23%|██▎       | 79/341 [00:00<00:00, 1379.85it/s, Materializing param=model.layers.5.mlp.fc1.weight]
Loading weights:  23%|██▎       | 80/341 [00:00<00:00, 1387.58it/s, Materializing param=model.layers.5.mlp.fc2.bias]
Loading weights:  23%|██▎       | 80/341 [00:00<00:00, 1385.42it/s, Materializing param=model.layers.5.mlp.fc2.bias]
Loading weights:  24%|██▍       | 81/341 [00:00<00:00, 1388.52it/s, Materializing param=model.layers.5.mlp.fc2.weight]
Loading weights:  24%|██▍       | 81/341 [00:00<00:00, 1386.09it/s, Materializing param=model.layers.5.mlp.fc2.weight]
Loading weights:  24%|██▍       | 82/341 [00:00<00:00, 1383.76it/s, Materializing param=model.layers.5.self_attn.dense.bias]
Loading weights:  24%|██▍       | 82/341 [00:00<00:00, 1380.92it/s, Materializing param=model.layers.5.self_attn.dense.bias]
Loading weights:  24%|██▍       | 83/341 [00:00<00:00, 1390.24it/s, Materializing param=model.layers.5.self_attn.dense.weight]
Loading weights:  24%|██▍       | 83/341 [00:00<00:00, 1387.75it/s, Materializing param=model.layers.5.self_attn.dense.weight]
Loading weights:  25%|██▍       | 84/341 [00:00<00:00, 1383.53it/s, Materializing param=model.layers.5.self_attn.k_proj.bias]
Loading weights:  25%|██▍       | 84/341 [00:00<00:00, 1381.01it/s, Materializing param=model.layers.5.self_attn.k_proj.bias]
Loading weights:  25%|██▍       | 85/341 [00:00<00:00, 1383.24it/s, Materializing param=model.layers.5.self_attn.k_proj.weight]
Loading weights:  25%|██▍       | 85/341 [00:00<00:00, 1380.88it/s, Materializing param=model.layers.5.self_attn.k_proj.weight]
Loading weights:  25%|██▌       | 86/341 [00:00<00:00, 1385.04it/s, Materializing param=model.layers.5.self_attn.q_proj.bias]
Loading weights:  25%|██▌       | 86/341 [00:00<00:00, 1382.53it/s, Materializing param=model.layers.5.self_attn.q_proj.bias]
Loading weights:  26%|██▌       | 87/341 [00:00<00:00, 1384.64it/s, Materializing param=model.layers.5.self_attn.q_proj.weight]
Loading weights:  26%|██▌       | 87/341 [00:00<00:00, 1382.64it/s, Materializing param=model.layers.5.self_attn.q_proj.weight]
Loading weights:  26%|██▌       | 88/341 [00:00<00:00, 1375.28it/s, Materializing param=model.layers.5.self_attn.v_proj.bias]
Loading weights:  26%|██▌       | 88/341 [00:00<00:00, 1372.83it/s, Materializing param=model.layers.5.self_attn.v_proj.bias]
Loading weights:  26%|██▌       | 89/341 [00:00<00:00, 1377.30it/s, Materializing param=model.layers.5.self_attn.v_proj.weight]
Loading weights:  26%|██▌       | 89/341 [00:00<00:00, 1375.37it/s, Materializing param=model.layers.5.self_attn.v_proj.weight]
Loading weights:  26%|██▋       | 90/341 [00:00<00:00, 1355.55it/s, Materializing param=model.layers.6.input_layernorm.bias]
Loading weights:  26%|██▋       | 90/341 [00:00<00:00, 1353.77it/s, Materializing param=model.layers.6.input_layernorm.bias]
Loading weights:  27%|██▋       | 91/341 [00:00<00:00, 1338.34it/s, Materializing param=model.layers.6.input_layernorm.weight]
Loading weights:  27%|██▋       | 91/341 [00:00<00:00, 1336.06it/s, Materializing param=model.layers.6.input_layernorm.weight]
Loading weights:  27%|██▋       | 92/341 [00:00<00:00, 1342.44it/s, Materializing param=model.layers.6.mlp.fc1.bias]
Loading weights:  27%|██▋       | 92/341 [00:00<00:00, 1340.57it/s, Materializing param=model.layers.6.mlp.fc1.bias]
Loading weights:  27%|██▋       | 93/341 [00:00<00:00, 1345.36it/s, Materializing param=model.layers.6.mlp.fc1.weight]
Loading weights:  27%|██▋       | 93/341 [00:00<00:00, 1343.51it/s, Materializing param=model.layers.6.mlp.fc1.weight]
Loading weights:  28%|██▊       | 94/341 [00:00<00:00, 1349.63it/s, Materializing param=model.layers.6.mlp.fc2.bias]
Loading weights:  28%|██▊       | 94/341 [00:00<00:00, 1348.10it/s, Materializing param=model.layers.6.mlp.fc2.bias]
Loading weights:  28%|██▊       | 95/341 [00:00<00:00, 1355.86it/s, Materializing param=model.layers.6.mlp.fc2.weight]
Loading weights:  28%|██▊       | 95/341 [00:00<00:00, 1354.01it/s, Materializing param=model.layers.6.mlp.fc2.weight]
Loading weights:  28%|██▊       | 96/341 [00:00<00:00, 1356.35it/s, Materializing param=model.layers.6.self_attn.dense.bias]
Loading weights:  28%|██▊       | 96/341 [00:00<00:00, 1354.59it/s, Materializing param=model.layers.6.self_attn.dense.bias]
Loading weights:  28%|██▊       | 97/341 [00:00<00:00, 1359.48it/s, Materializing param=model.layers.6.self_attn.dense.weight]
Loading weights:  28%|██▊       | 97/341 [00:00<00:00, 1357.97it/s, Materializing param=model.layers.6.self_attn.dense.weight]
Loading weights:  29%|██▊       | 98/341 [00:00<00:00, 1359.01it/s, Materializing param=model.layers.6.self_attn.k_proj.bias]
Loading weights:  29%|██▊       | 98/341 [00:00<00:00, 1357.34it/s, Materializing param=model.layers.6.self_attn.k_proj.bias]
Loading weights:  29%|██▉       | 99/341 [00:00<00:00, 1364.71it/s, Materializing param=model.layers.6.self_attn.k_proj.weight]
Loading weights:  29%|██▉       | 99/341 [00:00<00:00, 1363.26it/s, Materializing param=model.layers.6.self_attn.k_proj.weight]
Loading weights:  29%|██▉       | 100/341 [00:00<00:00, 1368.72it/s, Materializing param=model.layers.6.self_attn.q_proj.bias]
Loading weights:  29%|██▉       | 100/341 [00:00<00:00, 1366.49it/s, Materializing param=model.layers.6.self_attn.q_proj.bias]
Loading weights:  30%|██▉       | 101/341 [00:00<00:00, 1371.14it/s, Materializing param=model.layers.6.self_attn.q_proj.weight]
Loading weights:  30%|██▉       | 101/341 [00:00<00:00, 1369.40it/s, Materializing param=model.layers.6.self_attn.q_proj.weight]
Loading weights:  30%|██▉       | 102/341 [00:00<00:00, 1370.18it/s, Materializing param=model.layers.6.self_attn.v_proj.bias]
Loading weights:  30%|██▉       | 102/341 [00:00<00:00, 1368.58it/s, Materializing param=model.layers.6.self_attn.v_proj.bias]
Loading weights:  30%|███       | 103/341 [00:00<00:00, 1364.40it/s, Materializing param=model.layers.6.self_attn.v_proj.weight]
Loading weights:  30%|███       | 103/341 [00:00<00:00, 1362.81it/s, Materializing param=model.layers.6.self_attn.v_proj.weight]
Loading weights:  30%|███       | 104/341 [00:00<00:00, 1368.41it/s, Materializing param=model.layers.7.input_layernorm.bias]
Loading weights:  30%|███       | 104/341 [00:00<00:00, 1366.67it/s, Materializing param=model.layers.7.input_layernorm.bias]
Loading weights:  31%|███       | 105/341 [00:00<00:00, 1370.59it/s, Materializing param=model.layers.7.input_layernorm.weight]
Loading weights:  31%|███       | 105/341 [00:00<00:00, 1369.03it/s, Materializing param=model.layers.7.input_layernorm.weight]
Loading weights:  31%|███       | 106/341 [00:00<00:00, 1370.67it/s, Materializing param=model.layers.7.mlp.fc1.bias]
Loading weights:  31%|███       | 106/341 [00:00<00:00, 1368.92it/s, Materializing param=model.layers.7.mlp.fc1.bias]
Loading weights:  31%|███▏      | 107/341 [00:00<00:00, 1363.74it/s, Materializing param=model.layers.7.mlp.fc1.weight]
Loading weights:  31%|███▏      | 107/341 [00:00<00:00, 1361.98it/s, Materializing param=model.layers.7.mlp.fc1.weight]
Loading weights:  32%|███▏      | 108/341 [00:00<00:00, 1364.71it/s, Materializing param=model.layers.7.mlp.fc2.bias]
Loading weights:  32%|███▏      | 108/341 [00:00<00:00, 1362.89it/s, Materializing param=model.layers.7.mlp.fc2.bias]
Loading weights:  32%|███▏      | 109/341 [00:00<00:00, 1361.05it/s, Materializing param=model.layers.7.mlp.fc2.weight]
Loading weights:  32%|███▏      | 109/341 [00:00<00:00, 1359.52it/s, Materializing param=model.layers.7.mlp.fc2.weight]
Loading weights:  32%|███▏      | 110/341 [00:00<00:00, 1366.49it/s, Materializing param=model.layers.7.self_attn.dense.bias]
Loading weights:  32%|███▏      | 110/341 [00:00<00:00, 1364.78it/s, Materializing param=model.layers.7.self_attn.dense.bias]
Loading weights:  33%|███▎      | 111/341 [00:00<00:00, 1369.08it/s, Materializing param=model.layers.7.self_attn.dense.weight]
Loading weights:  33%|███▎      | 111/341 [00:00<00:00, 1367.57it/s, Materializing param=model.layers.7.self_attn.dense.weight]
Loading weights:  33%|███▎      | 112/341 [00:00<00:00, 1372.02it/s, Materializing param=model.layers.7.self_attn.k_proj.bias]
Loading weights:  33%|███▎      | 112/341 [00:00<00:00, 1370.60it/s, Materializing param=model.layers.7.self_attn.k_proj.bias]
Loading weights:  33%|███▎      | 113/341 [00:00<00:00, 1375.71it/s, Materializing param=model.layers.7.self_attn.k_proj.weight]
Loading weights:  33%|███▎      | 113/341 [00:00<00:00, 1374.23it/s, Materializing param=model.layers.7.self_attn.k_proj.weight]
Loading weights:  33%|███▎      | 114/341 [00:00<00:00, 1374.82it/s, Materializing param=model.layers.7.self_attn.q_proj.bias]
Loading weights:  33%|███▎      | 114/341 [00:00<00:00, 1373.38it/s, Materializing param=model.layers.7.self_attn.q_proj.bias]
Loading weights:  34%|███▎      | 115/341 [00:00<00:00, 1375.74it/s, Materializing param=model.layers.7.self_attn.q_proj.weight]
Loading weights:  34%|███▎      | 115/341 [00:00<00:00, 1374.07it/s, Materializing param=model.layers.7.self_attn.q_proj.weight]
Loading weights:  34%|███▍      | 116/341 [00:00<00:00, 1379.42it/s, Materializing param=model.layers.7.self_attn.v_proj.bias]
Loading weights:  34%|███▍      | 116/341 [00:00<00:00, 1378.17it/s, Materializing param=model.layers.7.self_attn.v_proj.bias]
Loading weights:  34%|███▍      | 117/341 [00:00<00:00, 1376.81it/s, Materializing param=model.layers.7.self_attn.v_proj.weight]
Loading weights:  34%|███▍      | 117/341 [00:00<00:00, 1374.99it/s, Materializing param=model.layers.7.self_attn.v_proj.weight]
Loading weights:  35%|███▍      | 118/341 [00:00<00:00, 1383.09it/s, Materializing param=model.layers.8.input_layernorm.bias]
Loading weights:  35%|███▍      | 118/341 [00:00<00:00, 1381.84it/s, Materializing param=model.layers.8.input_layernorm.bias]
Loading weights:  35%|███▍      | 119/341 [00:00<00:00, 1386.12it/s, Materializing param=model.layers.8.input_layernorm.weight]
Loading weights:  35%|███▍      | 119/341 [00:00<00:00, 1384.65it/s, Materializing param=model.layers.8.input_layernorm.weight]
Loading weights:  35%|███▌      | 120/341 [00:00<00:00, 1381.38it/s, Materializing param=model.layers.8.mlp.fc1.bias]
Loading weights:  35%|███▌      | 120/341 [00:00<00:00, 1379.37it/s, Materializing param=model.layers.8.mlp.fc1.bias]
Loading weights:  35%|███▌      | 121/341 [00:00<00:00, 1382.15it/s, Materializing param=model.layers.8.mlp.fc1.weight]
Loading weights:  35%|███▌      | 121/341 [00:00<00:00, 1380.67it/s, Materializing param=model.layers.8.mlp.fc1.weight]
Loading weights:  36%|███▌      | 122/341 [00:00<00:00, 1382.91it/s, Materializing param=model.layers.8.mlp.fc2.bias]
Loading weights:  36%|███▌      | 122/341 [00:00<00:00, 1381.37it/s, Materializing param=model.layers.8.mlp.fc2.bias]
Loading weights:  36%|███▌      | 123/341 [00:00<00:00, 1380.71it/s, Materializing param=model.layers.8.mlp.fc2.weight]
Loading weights:  36%|███▌      | 123/341 [00:00<00:00, 1379.19it/s, Materializing param=model.layers.8.mlp.fc2.weight]
Loading weights:  36%|███▋      | 124/341 [00:00<00:00, 1372.13it/s, Materializing param=model.layers.8.self_attn.dense.bias]
Loading weights:  36%|███▋      | 124/341 [00:00<00:00, 1370.72it/s, Materializing param=model.layers.8.self_attn.dense.bias]
Loading weights:  37%|███▋      | 125/341 [00:00<00:00, 1375.88it/s, Materializing param=model.layers.8.self_attn.dense.weight]
Loading weights:  37%|███▋      | 125/341 [00:00<00:00, 1374.02it/s, Materializing param=model.layers.8.self_attn.dense.weight]
Loading weights:  37%|███▋      | 126/341 [00:00<00:00, 1376.70it/s, Materializing param=model.layers.8.self_attn.k_proj.bias]
Loading weights:  37%|███▋      | 126/341 [00:00<00:00, 1375.28it/s, Materializing param=model.layers.8.self_attn.k_proj.bias]
Loading weights:  37%|███▋      | 127/341 [00:00<00:00, 1380.74it/s, Materializing param=model.layers.8.self_attn.k_proj.weight]
Loading weights:  37%|███▋      | 127/341 [00:00<00:00, 1379.56it/s, Materializing param=model.layers.8.self_attn.k_proj.weight]
Loading weights:  38%|███▊      | 128/341 [00:00<00:00, 1367.97it/s, Materializing param=model.layers.8.self_attn.q_proj.bias]
Loading weights:  38%|███▊      | 128/341 [00:00<00:00, 1366.15it/s, Materializing param=model.layers.8.self_attn.q_proj.bias]
Loading weights:  38%|███▊      | 129/341 [00:00<00:00, 1367.47it/s, Materializing param=model.layers.8.self_attn.q_proj.weight]
Loading weights:  38%|███▊      | 129/341 [00:00<00:00, 1366.09it/s, Materializing param=model.layers.8.self_attn.q_proj.weight]
Loading weights:  38%|███▊      | 130/341 [00:00<00:00, 1368.08it/s, Materializing param=model.layers.8.self_attn.v_proj.bias]
Loading weights:  38%|███▊      | 130/341 [00:00<00:00, 1365.87it/s, Materializing param=model.layers.8.self_attn.v_proj.bias]
Loading weights:  38%|███▊      | 131/341 [00:00<00:00, 1371.71it/s, Materializing param=model.layers.8.self_attn.v_proj.weight]
Loading weights:  38%|███▊      | 131/341 [00:00<00:00, 1370.02it/s, Materializing param=model.layers.8.self_attn.v_proj.weight]
Loading weights:  39%|███▊      | 132/341 [00:00<00:00, 1360.49it/s, Materializing param=model.layers.9.input_layernorm.bias]
Loading weights:  39%|███▊      | 132/341 [00:00<00:00, 1358.56it/s, Materializing param=model.layers.9.input_layernorm.bias]
Loading weights:  39%|███▉      | 133/341 [00:00<00:00, 1354.97it/s, Materializing param=model.layers.9.input_layernorm.weight]
Loading weights:  39%|███▉      | 133/341 [00:00<00:00, 1352.89it/s, Materializing param=model.layers.9.input_layernorm.weight]
Loading weights:  39%|███▉      | 134/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.input_layernorm.weight]
Loading weights:  39%|███▉      | 134/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.mlp.fc1.bias]
Loading weights:  39%|███▉      | 134/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.mlp.fc1.bias]
Loading weights:  40%|███▉      | 135/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.mlp.fc1.weight]
Loading weights:  40%|███▉      | 135/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.mlp.fc1.weight]
Loading weights:  40%|███▉      | 136/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.mlp.fc2.bias]
Loading weights:  40%|███▉      | 136/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.mlp.fc2.bias]
Loading weights:  40%|████      | 137/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.mlp.fc2.weight]
Loading weights:  40%|████      | 137/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.mlp.fc2.weight]
Loading weights:  40%|████      | 138/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.dense.bias]
Loading weights:  40%|████      | 138/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.dense.bias]
Loading weights:  41%|████      | 139/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.dense.weight]
Loading weights:  41%|████      | 139/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.dense.weight]
Loading weights:  41%|████      | 140/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.k_proj.bias]
Loading weights:  41%|████      | 140/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.k_proj.bias]
Loading weights:  41%|████▏     | 141/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.k_proj.weight]
Loading weights:  41%|████▏     | 141/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.k_proj.weight]
Loading weights:  42%|████▏     | 142/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.q_proj.bias]
Loading weights:  42%|████▏     | 142/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.q_proj.bias]
Loading weights:  42%|████▏     | 143/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.q_proj.weight]
Loading weights:  42%|████▏     | 143/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.q_proj.weight]
Loading weights:  42%|████▏     | 144/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.v_proj.bias]
Loading weights:  42%|████▏     | 144/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.v_proj.bias]
Loading weights:  43%|████▎     | 145/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.v_proj.weight]
Loading weights:  43%|████▎     | 145/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.9.self_attn.v_proj.weight]
Loading weights:  43%|████▎     | 146/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.input_layernorm.bias]
Loading weights:  43%|████▎     | 146/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.input_layernorm.bias]
Loading weights:  43%|████▎     | 147/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.input_layernorm.weight]
Loading weights:  43%|████▎     | 147/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.input_layernorm.weight]
Loading weights:  43%|████▎     | 148/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.mlp.fc1.bias]
Loading weights:  43%|████▎     | 148/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.mlp.fc1.bias]
Loading weights:  44%|████▎     | 149/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.mlp.fc1.weight]
Loading weights:  44%|████▎     | 149/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.mlp.fc1.weight]
Loading weights:  44%|████▍     | 150/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.mlp.fc2.bias]
Loading weights:  44%|████▍     | 150/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.mlp.fc2.bias]
Loading weights:  44%|████▍     | 151/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.mlp.fc2.weight]
Loading weights:  44%|████▍     | 151/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.mlp.fc2.weight]
Loading weights:  45%|████▍     | 152/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.dense.bias]
Loading weights:  45%|████▍     | 152/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.dense.bias]
Loading weights:  45%|████▍     | 153/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.dense.weight]
Loading weights:  45%|████▍     | 153/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.dense.weight]
Loading weights:  45%|████▌     | 154/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.k_proj.bias]
Loading weights:  45%|████▌     | 154/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.k_proj.bias]
Loading weights:  45%|████▌     | 155/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.k_proj.weight]
Loading weights:  45%|████▌     | 155/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.k_proj.weight]
Loading weights:  46%|████▌     | 156/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.q_proj.bias]
Loading weights:  46%|████▌     | 156/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.q_proj.bias]
Loading weights:  46%|████▌     | 157/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.q_proj.weight]
Loading weights:  46%|████▌     | 157/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.q_proj.weight]
Loading weights:  46%|████▋     | 158/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.v_proj.bias]
Loading weights:  46%|████▋     | 158/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.v_proj.bias]
Loading weights:  47%|████▋     | 159/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.v_proj.weight]
Loading weights:  47%|████▋     | 159/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.10.self_attn.v_proj.weight]
Loading weights:  47%|████▋     | 160/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.input_layernorm.bias]
Loading weights:  47%|████▋     | 160/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.input_layernorm.bias]
Loading weights:  47%|████▋     | 161/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.input_layernorm.weight]
Loading weights:  47%|████▋     | 161/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.input_layernorm.weight]
Loading weights:  48%|████▊     | 162/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.mlp.fc1.bias]
Loading weights:  48%|████▊     | 162/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.mlp.fc1.bias]
Loading weights:  48%|████▊     | 163/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.mlp.fc1.weight]
Loading weights:  48%|████▊     | 163/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.mlp.fc1.weight]
Loading weights:  48%|████▊     | 164/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.mlp.fc2.bias]
Loading weights:  48%|████▊     | 164/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.mlp.fc2.bias]
Loading weights:  48%|████▊     | 165/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.mlp.fc2.weight]
Loading weights:  48%|████▊     | 165/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.mlp.fc2.weight]
Loading weights:  49%|████▊     | 166/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.dense.bias]
Loading weights:  49%|████▊     | 166/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.dense.bias]
Loading weights:  49%|████▉     | 167/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.dense.weight]
Loading weights:  49%|████▉     | 167/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.dense.weight]
Loading weights:  49%|████▉     | 168/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.k_proj.bias]
Loading weights:  49%|████▉     | 168/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.k_proj.bias]
Loading weights:  50%|████▉     | 169/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.k_proj.weight]
Loading weights:  50%|████▉     | 169/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.k_proj.weight]
Loading weights:  50%|████▉     | 170/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.q_proj.bias]
Loading weights:  50%|████▉     | 170/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.q_proj.bias]
Loading weights:  50%|█████     | 171/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.q_proj.weight]
Loading weights:  50%|█████     | 171/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.q_proj.weight]
Loading weights:  50%|█████     | 172/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.v_proj.bias]
Loading weights:  50%|█████     | 172/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.v_proj.bias]
Loading weights:  51%|█████     | 173/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.v_proj.weight]
Loading weights:  51%|█████     | 173/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.11.self_attn.v_proj.weight]
Loading weights:  51%|█████     | 174/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.input_layernorm.bias]
Loading weights:  51%|█████     | 174/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.input_layernorm.bias]
Loading weights:  51%|█████▏    | 175/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.input_layernorm.weight]
Loading weights:  51%|█████▏    | 175/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.input_layernorm.weight]
Loading weights:  52%|█████▏    | 176/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.mlp.fc1.bias]
Loading weights:  52%|█████▏    | 176/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.mlp.fc1.bias]
Loading weights:  52%|█████▏    | 177/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.mlp.fc1.weight]
Loading weights:  52%|█████▏    | 177/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.mlp.fc1.weight]
Loading weights:  52%|█████▏    | 178/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.mlp.fc2.bias]
Loading weights:  52%|█████▏    | 178/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.mlp.fc2.bias]
Loading weights:  52%|█████▏    | 179/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.mlp.fc2.weight]
Loading weights:  52%|█████▏    | 179/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.mlp.fc2.weight]
Loading weights:  53%|█████▎    | 180/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.dense.bias]
Loading weights:  53%|█████▎    | 180/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.dense.bias]
Loading weights:  53%|█████▎    | 181/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.dense.weight]
Loading weights:  53%|█████▎    | 181/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.dense.weight]
Loading weights:  53%|█████▎    | 182/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.k_proj.bias]
Loading weights:  53%|█████▎    | 182/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.k_proj.bias]
Loading weights:  54%|█████▎    | 183/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.k_proj.weight]
Loading weights:  54%|█████▎    | 183/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.k_proj.weight]
Loading weights:  54%|█████▍    | 184/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.q_proj.bias]
Loading weights:  54%|█████▍    | 184/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.q_proj.bias]
Loading weights:  54%|█████▍    | 185/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.q_proj.weight]
Loading weights:  54%|█████▍    | 185/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.q_proj.weight]
Loading weights:  55%|█████▍    | 186/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.v_proj.bias]
Loading weights:  55%|█████▍    | 186/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.v_proj.bias]
Loading weights:  55%|█████▍    | 187/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.v_proj.weight]
Loading weights:  55%|█████▍    | 187/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.12.self_attn.v_proj.weight]
Loading weights:  55%|█████▌    | 188/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.input_layernorm.bias]
Loading weights:  55%|█████▌    | 188/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.input_layernorm.bias]
Loading weights:  55%|█████▌    | 189/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.input_layernorm.weight]
Loading weights:  55%|█████▌    | 189/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.input_layernorm.weight]
Loading weights:  56%|█████▌    | 190/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.mlp.fc1.bias]
Loading weights:  56%|█████▌    | 190/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.mlp.fc1.bias]
Loading weights:  56%|█████▌    | 191/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.mlp.fc1.weight]
Loading weights:  56%|█████▌    | 191/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.mlp.fc1.weight]
Loading weights:  56%|█████▋    | 192/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.mlp.fc2.bias]
Loading weights:  56%|█████▋    | 192/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.mlp.fc2.bias]
Loading weights:  57%|█████▋    | 193/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.mlp.fc2.weight]
Loading weights:  57%|█████▋    | 193/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.mlp.fc2.weight]
Loading weights:  57%|█████▋    | 194/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.dense.bias]
Loading weights:  57%|█████▋    | 194/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.dense.bias]
Loading weights:  57%|█████▋    | 195/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.dense.weight]
Loading weights:  57%|█████▋    | 195/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.dense.weight]
Loading weights:  57%|█████▋    | 196/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.k_proj.bias]
Loading weights:  57%|█████▋    | 196/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.k_proj.bias]
Loading weights:  58%|█████▊    | 197/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.k_proj.weight]
Loading weights:  58%|█████▊    | 197/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.k_proj.weight]
Loading weights:  58%|█████▊    | 198/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.q_proj.bias]
Loading weights:  58%|█████▊    | 198/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.q_proj.bias]
Loading weights:  58%|█████▊    | 199/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.q_proj.weight]
Loading weights:  58%|█████▊    | 199/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.q_proj.weight]
Loading weights:  59%|█████▊    | 200/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.v_proj.bias]
Loading weights:  59%|█████▊    | 200/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.v_proj.bias]
Loading weights:  59%|█████▉    | 201/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.v_proj.weight]
Loading weights:  59%|█████▉    | 201/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.13.self_attn.v_proj.weight]
Loading weights:  59%|█████▉    | 202/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.input_layernorm.bias]
Loading weights:  59%|█████▉    | 202/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.input_layernorm.bias]
Loading weights:  60%|█████▉    | 203/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.input_layernorm.weight]
Loading weights:  60%|█████▉    | 203/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.input_layernorm.weight]
Loading weights:  60%|█████▉    | 204/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.mlp.fc1.bias]
Loading weights:  60%|█████▉    | 204/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.mlp.fc1.bias]
Loading weights:  60%|██████    | 205/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.mlp.fc1.weight]
Loading weights:  60%|██████    | 205/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.mlp.fc1.weight]
Loading weights:  60%|██████    | 206/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.mlp.fc2.bias]
Loading weights:  60%|██████    | 206/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.mlp.fc2.bias]
Loading weights:  61%|██████    | 207/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.mlp.fc2.weight]
Loading weights:  61%|██████    | 207/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.mlp.fc2.weight]
Loading weights:  61%|██████    | 208/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.dense.bias]
Loading weights:  61%|██████    | 208/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.dense.bias]
Loading weights:  61%|██████▏   | 209/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.dense.weight]
Loading weights:  61%|██████▏   | 209/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.dense.weight]
Loading weights:  62%|██████▏   | 210/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.k_proj.bias]
Loading weights:  62%|██████▏   | 210/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.k_proj.bias]
Loading weights:  62%|██████▏   | 211/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.k_proj.weight]
Loading weights:  62%|██████▏   | 211/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.k_proj.weight]
Loading weights:  62%|██████▏   | 212/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.q_proj.bias]
Loading weights:  62%|██████▏   | 212/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.q_proj.bias]
Loading weights:  62%|██████▏   | 213/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.q_proj.weight]
Loading weights:  62%|██████▏   | 213/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.q_proj.weight]
Loading weights:  63%|██████▎   | 214/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.v_proj.bias]
Loading weights:  63%|██████▎   | 214/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.v_proj.bias]
Loading weights:  63%|██████▎   | 215/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.v_proj.weight]
Loading weights:  63%|██████▎   | 215/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.14.self_attn.v_proj.weight]
Loading weights:  63%|██████▎   | 216/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.input_layernorm.bias]
Loading weights:  63%|██████▎   | 216/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.input_layernorm.bias]
Loading weights:  64%|██████▎   | 217/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.input_layernorm.weight]
Loading weights:  64%|██████▎   | 217/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.input_layernorm.weight]
Loading weights:  64%|██████▍   | 218/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.mlp.fc1.bias]
Loading weights:  64%|██████▍   | 218/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.mlp.fc1.bias]
Loading weights:  64%|██████▍   | 219/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.mlp.fc1.weight]
Loading weights:  64%|██████▍   | 219/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.mlp.fc1.weight]
Loading weights:  65%|██████▍   | 220/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.mlp.fc2.bias]
Loading weights:  65%|██████▍   | 220/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.mlp.fc2.bias]
Loading weights:  65%|██████▍   | 221/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.mlp.fc2.weight]
Loading weights:  65%|██████▍   | 221/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.mlp.fc2.weight]
Loading weights:  65%|██████▌   | 222/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.dense.bias]
Loading weights:  65%|██████▌   | 222/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.dense.bias]
Loading weights:  65%|██████▌   | 223/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.dense.weight]
Loading weights:  65%|██████▌   | 223/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.dense.weight]
Loading weights:  66%|██████▌   | 224/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.k_proj.bias]
Loading weights:  66%|██████▌   | 224/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.k_proj.bias]
Loading weights:  66%|██████▌   | 225/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.k_proj.weight]
Loading weights:  66%|██████▌   | 225/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.k_proj.weight]
Loading weights:  66%|██████▋   | 226/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.q_proj.bias]
Loading weights:  66%|██████▋   | 226/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.q_proj.bias]
Loading weights:  67%|██████▋   | 227/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.q_proj.weight]
Loading weights:  67%|██████▋   | 227/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.q_proj.weight]
Loading weights:  67%|██████▋   | 228/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.v_proj.bias]
Loading weights:  67%|██████▋   | 228/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.v_proj.bias]
Loading weights:  67%|██████▋   | 229/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.v_proj.weight]
Loading weights:  67%|██████▋   | 229/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.15.self_attn.v_proj.weight]
Loading weights:  67%|██████▋   | 230/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.input_layernorm.bias]
Loading weights:  67%|██████▋   | 230/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.input_layernorm.bias]
Loading weights:  68%|██████▊   | 231/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.input_layernorm.weight]
Loading weights:  68%|██████▊   | 231/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.input_layernorm.weight]
Loading weights:  68%|██████▊   | 232/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.mlp.fc1.bias]
Loading weights:  68%|██████▊   | 232/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.mlp.fc1.bias]
Loading weights:  68%|██████▊   | 233/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.mlp.fc1.weight]
Loading weights:  68%|██████▊   | 233/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.mlp.fc1.weight]
Loading weights:  69%|██████▊   | 234/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.mlp.fc2.bias]
Loading weights:  69%|██████▊   | 234/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.mlp.fc2.bias]
Loading weights:  69%|██████▉   | 235/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.mlp.fc2.weight]
Loading weights:  69%|██████▉   | 235/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.mlp.fc2.weight]
Loading weights:  69%|██████▉   | 236/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.dense.bias]
Loading weights:  69%|██████▉   | 236/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.dense.bias]
Loading weights:  70%|██████▉   | 237/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.dense.weight]
Loading weights:  70%|██████▉   | 237/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.dense.weight]
Loading weights:  70%|██████▉   | 238/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.k_proj.bias]
Loading weights:  70%|██████▉   | 238/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.k_proj.bias]
Loading weights:  70%|███████   | 239/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.k_proj.weight]
Loading weights:  70%|███████   | 239/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.k_proj.weight]
Loading weights:  70%|███████   | 240/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.q_proj.bias]
Loading weights:  70%|███████   | 240/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.q_proj.bias]
Loading weights:  71%|███████   | 241/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.q_proj.weight]
Loading weights:  71%|███████   | 241/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.q_proj.weight]
Loading weights:  71%|███████   | 242/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.v_proj.bias]
Loading weights:  71%|███████   | 242/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.v_proj.bias]
Loading weights:  71%|███████▏  | 243/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.v_proj.weight]
Loading weights:  71%|███████▏  | 243/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.16.self_attn.v_proj.weight]
Loading weights:  72%|███████▏  | 244/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.input_layernorm.bias]
Loading weights:  72%|███████▏  | 244/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.input_layernorm.bias]
Loading weights:  72%|███████▏  | 245/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.input_layernorm.weight]
Loading weights:  72%|███████▏  | 245/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.input_layernorm.weight]
Loading weights:  72%|███████▏  | 246/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.mlp.fc1.bias]
Loading weights:  72%|███████▏  | 246/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.mlp.fc1.bias]
Loading weights:  72%|███████▏  | 247/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.mlp.fc1.weight]
Loading weights:  72%|███████▏  | 247/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.mlp.fc1.weight]
Loading weights:  73%|███████▎  | 248/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.mlp.fc2.bias]
Loading weights:  73%|███████▎  | 248/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.mlp.fc2.bias]
Loading weights:  73%|███████▎  | 249/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.mlp.fc2.weight]
Loading weights:  73%|███████▎  | 249/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.mlp.fc2.weight]
Loading weights:  73%|███████▎  | 250/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.dense.bias]
Loading weights:  73%|███████▎  | 250/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.dense.bias]
Loading weights:  74%|███████▎  | 251/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.dense.weight]
Loading weights:  74%|███████▎  | 251/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.dense.weight]
Loading weights:  74%|███████▍  | 252/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.k_proj.bias]
Loading weights:  74%|███████▍  | 252/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.k_proj.bias]
Loading weights:  74%|███████▍  | 253/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.k_proj.weight]
Loading weights:  74%|███████▍  | 253/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.k_proj.weight]
Loading weights:  74%|███████▍  | 254/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.q_proj.bias]
Loading weights:  74%|███████▍  | 254/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.q_proj.bias]
Loading weights:  75%|███████▍  | 255/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.q_proj.weight]
Loading weights:  75%|███████▍  | 255/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.q_proj.weight]
Loading weights:  75%|███████▌  | 256/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.v_proj.bias]
Loading weights:  75%|███████▌  | 256/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.v_proj.bias]
Loading weights:  75%|███████▌  | 257/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.v_proj.weight]
Loading weights:  75%|███████▌  | 257/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.17.self_attn.v_proj.weight]
Loading weights:  76%|███████▌  | 258/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.input_layernorm.bias]
Loading weights:  76%|███████▌  | 258/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.input_layernorm.bias]
Loading weights:  76%|███████▌  | 259/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.input_layernorm.weight]
Loading weights:  76%|███████▌  | 259/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.input_layernorm.weight]
Loading weights:  76%|███████▌  | 260/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.mlp.fc1.bias]
Loading weights:  76%|███████▌  | 260/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.mlp.fc1.bias]
Loading weights:  77%|███████▋  | 261/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.mlp.fc1.weight]
Loading weights:  77%|███████▋  | 261/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.mlp.fc1.weight]
Loading weights:  77%|███████▋  | 262/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.mlp.fc2.bias]
Loading weights:  77%|███████▋  | 262/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.mlp.fc2.bias]
Loading weights:  77%|███████▋  | 263/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.mlp.fc2.weight]
Loading weights:  77%|███████▋  | 263/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.mlp.fc2.weight]
Loading weights:  77%|███████▋  | 264/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.self_attn.dense.bias]
Loading weights:  77%|███████▋  | 264/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.self_attn.dense.bias]
Loading weights:  78%|███████▊  | 265/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.self_attn.dense.weight]
Loading weights:  78%|███████▊  | 265/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.self_attn.dense.weight]
Loading weights:  78%|███████▊  | 266/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.self_attn.k_proj.bias]
Loading weights:  78%|███████▊  | 266/341 [00:00<00:00, 1329.85it/s, Materializing param=model.layers.18.self_attn.k_proj.bias]
Loading weights:  78%|███████▊  | 267/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.k_proj.bias]
Loading weights:  78%|███████▊  | 267/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.k_proj.weight]
Loading weights:  78%|███████▊  | 267/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.k_proj.weight]
Loading weights:  79%|███████▊  | 268/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.q_proj.bias]
Loading weights:  79%|███████▊  | 268/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.q_proj.bias]
Loading weights:  79%|███████▉  | 269/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.q_proj.weight]
Loading weights:  79%|███████▉  | 269/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.q_proj.weight]
Loading weights:  79%|███████▉  | 270/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.v_proj.bias]
Loading weights:  79%|███████▉  | 270/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.v_proj.bias]
Loading weights:  79%|███████▉  | 271/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.v_proj.weight]
Loading weights:  79%|███████▉  | 271/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.18.self_attn.v_proj.weight]
Loading weights:  80%|███████▉  | 272/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.input_layernorm.bias]
Loading weights:  80%|███████▉  | 272/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.input_layernorm.bias]
Loading weights:  80%|████████  | 273/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.input_layernorm.weight]
Loading weights:  80%|████████  | 273/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.input_layernorm.weight]
Loading weights:  80%|████████  | 274/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.mlp.fc1.bias]
Loading weights:  80%|████████  | 274/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.mlp.fc1.bias]
Loading weights:  81%|████████  | 275/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.mlp.fc1.weight]
Loading weights:  81%|████████  | 275/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.mlp.fc1.weight]
Loading weights:  81%|████████  | 276/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.mlp.fc2.bias]
Loading weights:  81%|████████  | 276/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.mlp.fc2.bias]
Loading weights:  81%|████████  | 277/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.mlp.fc2.weight]
Loading weights:  81%|████████  | 277/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.mlp.fc2.weight]
Loading weights:  82%|████████▏ | 278/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.dense.bias]
Loading weights:  82%|████████▏ | 278/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.dense.bias]
Loading weights:  82%|████████▏ | 279/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.dense.weight]
Loading weights:  82%|████████▏ | 279/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.dense.weight]
Loading weights:  82%|████████▏ | 280/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.k_proj.bias]
Loading weights:  82%|████████▏ | 280/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.k_proj.bias]
Loading weights:  82%|████████▏ | 281/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.k_proj.weight]
Loading weights:  82%|████████▏ | 281/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.k_proj.weight]
Loading weights:  83%|████████▎ | 282/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.q_proj.bias]
Loading weights:  83%|████████▎ | 282/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.q_proj.bias]
Loading weights:  83%|████████▎ | 283/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.q_proj.weight]
Loading weights:  83%|████████▎ | 283/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.q_proj.weight]
Loading weights:  83%|████████▎ | 284/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.v_proj.bias]
Loading weights:  83%|████████▎ | 284/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.v_proj.bias]
Loading weights:  84%|████████▎ | 285/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.v_proj.weight]
Loading weights:  84%|████████▎ | 285/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.19.self_attn.v_proj.weight]
Loading weights:  84%|████████▍ | 286/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.input_layernorm.bias]
Loading weights:  84%|████████▍ | 286/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.input_layernorm.bias]
Loading weights:  84%|████████▍ | 287/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.input_layernorm.weight]
Loading weights:  84%|████████▍ | 287/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.input_layernorm.weight]
Loading weights:  84%|████████▍ | 288/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.mlp.fc1.bias]
Loading weights:  84%|████████▍ | 288/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.mlp.fc1.bias]
Loading weights:  85%|████████▍ | 289/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.mlp.fc1.weight]
Loading weights:  85%|████████▍ | 289/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.mlp.fc1.weight]
Loading weights:  85%|████████▌ | 290/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.mlp.fc2.bias]
Loading weights:  85%|████████▌ | 290/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.mlp.fc2.bias]
Loading weights:  85%|████████▌ | 291/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.mlp.fc2.weight]
Loading weights:  85%|████████▌ | 291/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.mlp.fc2.weight]
Loading weights:  86%|████████▌ | 292/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.dense.bias]
Loading weights:  86%|████████▌ | 292/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.dense.bias]
Loading weights:  86%|████████▌ | 293/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.dense.weight]
Loading weights:  86%|████████▌ | 293/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.dense.weight]
Loading weights:  86%|████████▌ | 294/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.k_proj.bias]
Loading weights:  86%|████████▌ | 294/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.k_proj.bias]
Loading weights:  87%|████████▋ | 295/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.k_proj.weight]
Loading weights:  87%|████████▋ | 295/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.k_proj.weight]
Loading weights:  87%|████████▋ | 296/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.q_proj.bias]
Loading weights:  87%|████████▋ | 296/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.q_proj.bias]
Loading weights:  87%|████████▋ | 297/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.q_proj.weight]
Loading weights:  87%|████████▋ | 297/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.q_proj.weight]
Loading weights:  87%|████████▋ | 298/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.v_proj.bias]
Loading weights:  87%|████████▋ | 298/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.v_proj.bias]
Loading weights:  88%|████████▊ | 299/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.v_proj.weight]
Loading weights:  88%|████████▊ | 299/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.20.self_attn.v_proj.weight]
Loading weights:  88%|████████▊ | 300/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.input_layernorm.bias]
Loading weights:  88%|████████▊ | 300/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.input_layernorm.bias]
Loading weights:  88%|████████▊ | 301/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.input_layernorm.weight]
Loading weights:  88%|████████▊ | 301/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.input_layernorm.weight]
Loading weights:  89%|████████▊ | 302/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.mlp.fc1.bias]
Loading weights:  89%|████████▊ | 302/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.mlp.fc1.bias]
Loading weights:  89%|████████▉ | 303/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.mlp.fc1.weight]
Loading weights:  89%|████████▉ | 303/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.mlp.fc1.weight]
Loading weights:  89%|████████▉ | 304/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.mlp.fc2.bias]
Loading weights:  89%|████████▉ | 304/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.mlp.fc2.bias]
Loading weights:  89%|████████▉ | 305/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.mlp.fc2.weight]
Loading weights:  89%|████████▉ | 305/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.mlp.fc2.weight]
Loading weights:  90%|████████▉ | 306/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.dense.bias]
Loading weights:  90%|████████▉ | 306/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.dense.bias]
Loading weights:  90%|█████████ | 307/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.dense.weight]
Loading weights:  90%|█████████ | 307/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.dense.weight]
Loading weights:  90%|█████████ | 308/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.k_proj.bias]
Loading weights:  90%|█████████ | 308/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.k_proj.bias]
Loading weights:  91%|█████████ | 309/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.k_proj.weight]
Loading weights:  91%|█████████ | 309/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.k_proj.weight]
Loading weights:  91%|█████████ | 310/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.q_proj.bias]
Loading weights:  91%|█████████ | 310/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.q_proj.bias]
Loading weights:  91%|█████████ | 311/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.q_proj.weight]
Loading weights:  91%|█████████ | 311/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.q_proj.weight]
Loading weights:  91%|█████████▏| 312/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.v_proj.bias]
Loading weights:  91%|█████████▏| 312/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.v_proj.bias]
Loading weights:  92%|█████████▏| 313/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.v_proj.weight]
Loading weights:  92%|█████████▏| 313/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.21.self_attn.v_proj.weight]
Loading weights:  92%|█████████▏| 314/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.input_layernorm.bias]
Loading weights:  92%|█████████▏| 314/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.input_layernorm.bias]
Loading weights:  92%|█████████▏| 315/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.input_layernorm.weight]
Loading weights:  92%|█████████▏| 315/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.input_layernorm.weight]
Loading weights:  93%|█████████▎| 316/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.mlp.fc1.bias]
Loading weights:  93%|█████████▎| 316/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.mlp.fc1.bias]
Loading weights:  93%|█████████▎| 317/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.mlp.fc1.weight]
Loading weights:  93%|█████████▎| 317/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.mlp.fc1.weight]
Loading weights:  93%|█████████▎| 318/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.mlp.fc2.bias]
Loading weights:  93%|█████████▎| 318/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.mlp.fc2.bias]
Loading weights:  94%|█████████▎| 319/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.mlp.fc2.weight]
Loading weights:  94%|█████████▎| 319/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.mlp.fc2.weight]
Loading weights:  94%|█████████▍| 320/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.dense.bias]
Loading weights:  94%|█████████▍| 320/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.dense.bias]
Loading weights:  94%|█████████▍| 321/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.dense.weight]
Loading weights:  94%|█████████▍| 321/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.dense.weight]
Loading weights:  94%|█████████▍| 322/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.k_proj.bias]
Loading weights:  94%|█████████▍| 322/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.k_proj.bias]
Loading weights:  95%|█████████▍| 323/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.k_proj.weight]
Loading weights:  95%|█████████▍| 323/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.k_proj.weight]
Loading weights:  95%|█████████▌| 324/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.q_proj.bias]
Loading weights:  95%|█████████▌| 324/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.q_proj.bias]
Loading weights:  95%|█████████▌| 325/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.q_proj.weight]
Loading weights:  95%|█████████▌| 325/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.q_proj.weight]
Loading weights:  96%|█████████▌| 326/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.v_proj.bias]
Loading weights:  96%|█████████▌| 326/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.v_proj.bias]
Loading weights:  96%|█████████▌| 327/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.v_proj.weight]
Loading weights:  96%|█████████▌| 327/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.22.self_attn.v_proj.weight]
Loading weights:  96%|█████████▌| 328/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.input_layernorm.bias]
Loading weights:  96%|█████████▌| 328/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.input_layernorm.bias]
Loading weights:  96%|█████████▋| 329/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.input_layernorm.weight]
Loading weights:  96%|█████████▋| 329/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.input_layernorm.weight]
Loading weights:  97%|█████████▋| 330/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.mlp.fc1.bias]
Loading weights:  97%|█████████▋| 330/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.mlp.fc1.bias]
Loading weights:  97%|█████████▋| 331/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.mlp.fc1.weight]
Loading weights:  97%|█████████▋| 331/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.mlp.fc1.weight]
Loading weights:  97%|█████████▋| 332/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.mlp.fc2.bias]
Loading weights:  97%|█████████▋| 332/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.mlp.fc2.bias]
Loading weights:  98%|█████████▊| 333/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.mlp.fc2.weight]
Loading weights:  98%|█████████▊| 333/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.mlp.fc2.weight]
Loading weights:  98%|█████████▊| 334/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.dense.bias]
Loading weights:  98%|█████████▊| 334/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.dense.bias]
Loading weights:  98%|█████████▊| 335/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.dense.weight]
Loading weights:  98%|█████████▊| 335/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.dense.weight]
Loading weights:  99%|█████████▊| 336/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.k_proj.bias]
Loading weights:  99%|█████████▊| 336/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.k_proj.bias]
Loading weights:  99%|█████████▉| 337/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.k_proj.weight]
Loading weights:  99%|█████████▉| 337/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.k_proj.weight]
Loading weights:  99%|█████████▉| 338/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.q_proj.bias]
Loading weights:  99%|█████████▉| 338/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.q_proj.bias]
Loading weights:  99%|█████████▉| 339/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.q_proj.weight]
Loading weights:  99%|█████████▉| 339/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.q_proj.weight]
Loading weights: 100%|█████████▉| 340/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.v_proj.bias]
Loading weights: 100%|█████████▉| 340/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.v_proj.bias]
Loading weights: 100%|██████████| 341/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.v_proj.weight]
Loading weights: 100%|██████████| 341/341 [00:00<00:00, 714.91it/s, Materializing param=model.layers.23.self_attn.v_proj.weight]
Loading weights: 100%|██████████| 341/341 [00:00<00:00, 848.20it/s, Materializing param=model.layers.23.self_attn.v_proj.weight]
-- done.
-- tokenize the prompt...
-- done.
-- compute the answer...
-- done in 3.0059874660000787
output shape: T7s1x123[7,50285:A10138.878048780489]
-- decode the answer...
-- done.
def print_prime(n):
   """
   Print all primes between 1 and n
   """
   primes = []
   for num in range(2, n+1):
       is_prime = True
       for i in range(2, int(math.sqrt(num))+1):
           if num % i == 0:
               is_prime = False
               break
       if is_prime:
           primes.append(num)
   print(primes)

print_prime(20)
``

eos_token_id?

This token means the end of the answer.

print("eos_token_id=", tokenizer.eos_token_id)
eos_token_id= 50256

Custom method generate

Let’s implement a simple function replicating when method generate does.

def simple_generate_with_cache(
    model, input_ids: torch.Tensor, eos_token_id: int, max_new_tokens: int = 100
):
    # First call: prefill
    outputs = model(input_ids, use_cache=True)

    # Next calls: decode
    for _ in tqdm(list(range(max_new_tokens))):
        next_token_logits = outputs.logits[:, -1, :]
        past_key_values = outputs.past_key_values

        # The most probable next token is chosen.
        next_token_id = torch.argmax(next_token_logits, dim=-1, keepdim=True)
        # But we could select it using a multinomial law
        # <<< probs = torch.softmax(next_token_logits / temperature, dim=-1)
        # <<< top_probs, top_indices = torch.topk(probs, top_k)
        # <<< next_token_id = top_indices[torch.multinomial(top_probs, 1)]

        if next_token_id.item() == eos_token_id:
            break
        input_ids = torch.cat([input_ids, next_token_id], dim=-1)

        # Feed only the new token, but with the cache
        outputs = model(next_token_id, use_cache=True, past_key_values=past_key_values)

    return input_ids


print("-- compute the answer with custom generate...")
begin = time.perf_counter()
outputs = simple_generate_with_cache(
    model, inputs.input_ids, eos_token_id=tokenizer.eos_token_id, max_new_tokens=100
)
duration = time.perf_counter() - begin
print(f"-- done in {duration}")
data.append(dict(name="custom", duration=duration))

print("-- done.")
print("output shape:", string_type(outputs, with_shape=True, with_min_max=True))
print("-- decode the answer...")
text = tokenizer.batch_decode(outputs)[0]
print("-- done.")
print(text)
-- compute the answer with custom generate...

  0%|          | 0/100 [00:00<?, ?it/s]
  3%|▎         | 3/100 [00:00<00:03, 27.30it/s]
  6%|▌         | 6/100 [00:00<00:03, 27.95it/s]
  9%|▉         | 9/100 [00:00<00:03, 27.61it/s]
 12%|█▏        | 12/100 [00:00<00:03, 27.57it/s]
 16%|█▌        | 16/100 [00:00<00:02, 29.08it/s]
 20%|██        | 20/100 [00:00<00:02, 29.20it/s]
 24%|██▍       | 24/100 [00:00<00:02, 28.68it/s]
 27%|██▋       | 27/100 [00:00<00:02, 26.42it/s]
 30%|███       | 30/100 [00:01<00:02, 25.04it/s]
 33%|███▎      | 33/100 [00:01<00:03, 21.91it/s]
 36%|███▌      | 36/100 [00:01<00:02, 23.05it/s]
 39%|███▉      | 39/100 [00:01<00:03, 17.78it/s]
 41%|████      | 41/100 [00:01<00:03, 17.25it/s]
 44%|████▍     | 44/100 [00:01<00:02, 18.84it/s]
 47%|████▋     | 47/100 [00:02<00:02, 21.14it/s]
 50%|█████     | 50/100 [00:02<00:02, 22.44it/s]
 53%|█████▎    | 53/100 [00:02<00:01, 24.00it/s]
 56%|█████▌    | 56/100 [00:02<00:01, 24.84it/s]
 59%|█████▉    | 59/100 [00:02<00:01, 25.65it/s]
 62%|██████▏   | 62/100 [00:02<00:01, 25.01it/s]
 65%|██████▌   | 65/100 [00:02<00:01, 26.01it/s]
 68%|██████▊   | 68/100 [00:02<00:01, 26.66it/s]
 71%|███████   | 71/100 [00:02<00:01, 27.17it/s]
 74%|███████▍  | 74/100 [00:03<00:00, 27.43it/s]
 77%|███████▋  | 77/100 [00:03<00:00, 26.39it/s]
 80%|████████  | 80/100 [00:03<00:00, 26.38it/s]
 83%|████████▎ | 83/100 [00:03<00:00, 26.41it/s]
 86%|████████▌ | 86/100 [00:03<00:00, 26.72it/s]
 89%|████████▉ | 89/100 [00:03<00:00, 26.17it/s]
 92%|█████████▏| 92/100 [00:03<00:00, 21.20it/s]
 95%|█████████▌| 95/100 [00:04<00:00, 18.35it/s]
 98%|█████████▊| 98/100 [00:04<00:00, 19.93it/s]
100%|██████████| 100/100 [00:04<00:00, 23.71it/s]
-- done in 4.376283818997763
-- done.
output shape: T7s1x123[7,50285:A10138.878048780489]
-- decode the answer...
-- done.
def print_prime(n):
   """
   Print all primes between 1 and n
   """
   primes = []
   for num in range(2, n+1):
       is_prime = True
       for i in range(2, int(math.sqrt(num))+1):
           if num % i == 0:
               is_prime = False
               break
       if is_prime:
           primes.append(num)
   print(primes)

print_prime(20)
``

Method generate for onnx models

We first need to export the model into ONNX.

ONNX Conversion

if "position_ids" in export_inputs:
    del export_inputs["position_ids"]
    del export_shapes["position_ids"]
dtype = get_weight_type(model)
print("-- model dtype:", dtype)
export_inputs["past_key_values"] = to_any(export_inputs["past_key_values"], dtype)
exporter = "onnx-dynamo" if "dynamo" in sys.argv else "custom"
model_name = f"model_{model_id.replace('/', '-')}.{exporter}.onnx"
if not os.path.exists(model_name):
    # This step is slow so let's skip it if it was already done.
    print("-- conversion to ONNX.")
    begin = time.perf_counter()
    with torch_export_patches(patch_transformers=True):
        to_onnx(
            model,
            (),
            kwargs=to_any(export_inputs, device),
            dynamic_shapes=export_shapes,
            filename=model_name,
            verbose=1,
            exporter=exporter,
        )
    duration = time.perf_counter() - begin
    print(f"-- done in {duration}")
-- model dtype: torch.float16
-- conversion to ONNX.
[to_onnx] build the graph module from <class 'transformers.models.phi.modeling_phi.PhiForCausalLM'>, type(args)=<class 'tuple'>
[to_onnx] dynamic_shapes={'input_ids': {0: 'batch', 1: 'seq_length'}, 'attention_mask': {0: 'batch', 1: 'cache+seq'}, 'past_key_values': [{0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}]}
[_make_builder_interpreter] export_options=ExportOptions(aten_as_function=('aten.index_copy.default', 'aten.index_put.default', 'aten.setitem', <built-in function setitem>))
[_make_builder_interpreter] input args=()
[_make_builder_interpreter] input kwargs=dict(input_ids:T7r2,attention_mask:T7r2,past_key_values:DynamicCache(key_cache=#24[T10r4,...], value_cache=#24[T10r4,...]))
[_make_builder_interpreter] dynamic_shapes={'input_ids': {0: 'batch', 1: 'seq_length'}, 'attention_mask': {0: 'batch', 1: 'cache+seq'}, 'past_key_values': [{0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}]}
[_make_builder_interpreter] same_signature=True, tracing_mode=symbolic
[ExportOptions.export] ExportOptions(aten_as_function=('aten.index_copy.default', 'aten.index_put.default', 'aten.setitem', <built-in function setitem>)) - torch._dynamo.export 'PhiForCausalLM'
[ExportOptions.export] aten_as_function=('aten.index_copy.default', 'aten.index_put.default', 'aten.setitem', <built-in function setitem>)
[ExportOptions.export] torch_export strict=False, verbose=1
[ExportOptions.export] dynamic_shapes={'input_ids': {0: 'batch', 1: 'seq_length'}, 'attention_mask': {0: 'batch', 1: 'cache+seq'}, 'past_key_values': [{0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}, {0: 'batch', 2: 'cache_length'}]}
[ExportOptions.export] args=()
[ExportOptions.export] kwargs=dict(input_ids:T7r2,attention_mask:T7r2,past_key_values:DynamicCache(key_cache=#24[T10r4,...], value_cache=#24[T10r4,...]))
[ExportOptions.export] export start with strict=False...
[ExportOptions.export] export with backed_size_oblivious=auto
[torch_export] backed_size_oblivious='auto'
[torch_export] inferred backed_size_oblivious=None
[torch_export] export starts with backed_size_oblivious=None
[ExportOptions.export] export done in 11.388620563997392
[ExportOptions.export] post_process_exported_program with decomposition_table=None
[ExportOptions.export] remove inplace nodes
[ExportOptions.export] slices: 6 slices nodes were removed
[CustomTracer.remove_inplace] starts with 1896 nodes (n_inplace_submobules=0)
[CustomTracer.remove_inplace] S1: 80 inplace nodes
[CustomTracer.remove_inplace] S2: 74 inplace nodes and 100 iterations
[CustomTracer.remove_inplace] end with 95 iterations and 1710 nodes (n_inplace=74)
[ExportOptions.export] inplaces: 80 inplaced nodes were removed
[ExportOptions.export] done remove inplace in 0.04992351799955941, modified=80
[ExportOptions.export] done with no decomposition in 0.05059635299767251
[to_onnx] graph module done in 11.58710700300071 s
[to_onnx] start creating the onnx nodes
[to_onnx] interpreter.function_options=FunctionOptions(export_as_function=True, name='*', domain='*', external_threshold=256, move_initializer_to_constant=True, return_initializer=True, merge_allowed=True, rename_allowed=True)

  0%|          | 0/1710 [00:00<?, ?it/s]
 26%|██▌       | 448/1710 [00:00<00:00, 4459.74it/s]
 52%|█████▏    | 894/1710 [00:00<00:00, 1818.02it/s]
 67%|██████▋   | 1154/1710 [00:00<00:00, 1546.51it/s]
 79%|███████▉  | 1349/1710 [00:00<00:00, 1388.97it/s]
 88%|████████▊ | 1510/1710 [00:01<00:00, 1236.79it/s]
 96%|█████████▋| 1646/1710 [00:01<00:00, 1200.51it/s]
100%|██████████| 1710/1710 [00:01<00:00, 1390.41it/s]
[to_onnx] 2312 onnx nodes done in 1.4021027230010077 s
[to_onnx] start conversion to onnx (before optimization) mask_outputs=[True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True]
[GraphBuilder-JUE.inline_functions] begin inlining graph
[GraphBuilder-JUE.inline_functions] skip_functions=set()
[GraphBuilder-JUE._inline_functions_iterations] inline function 'submod_3' domain 'local_functions' [n_replacements=1]
[GraphBuilder-JUE._inline_functions_iterations] done with 9 new nodes for 'submod_3', 'local_functions'
[GraphBuilder-JUE.inline_functions] done inlining graph 129171974220800 in 0.030534073001035722
[GraphBuilder-JUE._add_shape_information] dynamic shapes replacements={'seq_length': 'seq_length', 'batch': 'batch', 'cache_length': 'cache_length', 's1': 'batch', 's60': 'batch', 's43': 'batch', 's47': 'batch', 's39': 'batch', 's69': 'batch', 's91': 'batch', 'batch^s3^batch^s41': 'batch', 's93': 'batch', 'batch^s98^batch^s79': 'batch', 'batch^s92^batch^s83': 'batch', 'batch^s84^batch^s91': 'batch', 's61': 'batch', 's30': 'batch', 'batch^s45^batch^s47': 'batch', 'batch^s30^batch^s89': 'batch', 's64': 'batch', 'batch^s49^batch^s26': 'batch', 's56': 'batch', 's106': 'batch', 'batch^s35^batch^s60': 'batch', 'batch^s64^batch^s86': 'batch', 's90': 'batch', 's79': 'batch', 's77': 'batch', 'batch^s97^batch^s10': 'batch', 's13': 'batch', 's87': 'batch', 's62': 'batch', 'batch^s39^batch^s71': 'batch', 's3': 'batch', 'batch^s82^batch^s62': 'batch', 's41': 'batch', 's59': 'batch', 'batch^s52^batch^s93': 'batch', 's35': 'batch', 's72': 'batch', 's23': 'batch', 's52': 'batch', 's100': 'batch', 's36': 'batch', 's49': 'batch', 's48': 'batch', 'batch^s34^batch^s77': 'batch', 'batch^s104^batch^s106': 'batch', 's104': 'batch', 's57': 'batch', 's8': 'batch', 's86': 'batch', 's102': 'batch', 's89': 'batch', 'batch^s29^batch^s8': 'batch', 's29': 'batch', 's83': 'batch', 's98': 'batch', 'batch^s87^batch^s23': 'batch', 's97': 'batch', 's67': 'batch', 'batch^s1^batch^s75': 'batch', 's84': 'batch', 'batch^s69^batch^s56': 'batch', 's45': 'batch', 'batch^s48^batch^s59': 'batch', 's26': 'batch', 'batch^s67^batch^s61': 'batch', 's82': 'batch', 'batch^s90^batch^s57': 'batch', 's71': 'batch', 'batch^s36^batch^s13': 'batch', 's10': 'batch', 's34': 'batch', 's92': 'batch', 's75': 'batch', 'batch^s100^batch^s102': 'batch', 's70': 'seq_length', 's42': 'cache_length', 's65': 'cache_length', 's101': 'cache_length', 's76': 'cache_length', 's31': 'cache_length', 's28': 'cache_length', 's63': 'cache_length', 's85': 'cache_length', 's105': 'cache_length', 's88': 'cache_length', 's99': 'cache_length', 's96': 'cache_length', 's95': 'cache_length', 's14': 'cache_length', 's18': 'cache_length', 's46': 'cache_length', 's11': 'cache_length', 's66': 'cache_length', 's50': 'cache_length', 's55': 'cache_length', 's81': 'cache_length', 's27': 'cache_length', 's38': 'cache_length', 's51': 'cache_length', 's40': 'cache_length', 's21': 'cache_length', 's54': 'cache_length', 's37': 'cache_length', 's73': 'cache_length', 's2': 'cache_length', 's58': 'cache_length', 's25': 'cache_length', 's107': 'cache_length', 's68': 'cache_length', 's44': 'cache_length', 's15': 'cache_length', 's32': 'cache_length', 's33': 'cache_length', 's22': 'cache_length', 's94': 'cache_length', 's103': 'cache_length', 's9': 'cache_length', 's4': 'cache_length', 's78': 'cache_length', 's74': 'cache_length', 's7': 'cache_length', 's80': 'cache_length', 's24': 'cache_length'}
[GraphBuilder-JUE.optimize] start with 2320 nodes
[GraphBuilder-JUE.optimize] #patterns=110
[GraphBuilder-JUE.optimize] start with subgraphs
[GraphBuilder-JUE.optimize] done with subgraphs
[GraphBuilderPatternOptimization-JUE.optimize] start with 1991 nodes, 459 initializers, 110 patterns, priorities=[0, 1, 2, 3], max_iter=7964
[GraphBuilderPatternOptimization-JUE.optimize] same children={'SameChildrenPattern', 'SameChildrenFromInputPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] iteration 0: 1991 nodes, priority=0
[GraphBuilderPatternOptimization-JUE.optimize] applies 226 matches, 75*CastPattern, 2*IdentityPattern, 1*ShapeBasedStaticExpandPattern, 96*ShapeBasedEditDistanceReshapePattern, 18*ShapeBasedIdentityPattern, 6*SameChildrenPattern, 1*SqueezeAddPattern, 1*SqueezeUnsqueezePattern, 2*UnsqueezeUnsqueezePattern, 24*FunctionAttentionPattern - time=0.161 | max_time=IdentityPattern:0.067
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=201, n_removed=255, n_applied=275 applied patterns, 1592 nodes left with 23 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 1
[GraphBuilderPatternOptimization-JUE.optimize] iteration 1: 1592 nodes, priority=1
[GraphBuilderPatternOptimization-JUE.optimize] applies 202 matches, 2*ConcatTwiceUnaryPattern, 1*ConstantToInitializerPattern, 49*DropoutPattern, 1*IdentityPattern, 25*LayerNormalizationPattern, 96*SlicesSplitPattern, 1*SqueezeUnsqueezePattern, 3*UnsqueezeUnsqueezePattern, 24*GeluOrtPattern - time=0.243 | max_time=IdentityPattern:0.034
[GraphBuilderPatternOptimization-JUE.optimize] iteration 2: 1123 nodes, priority=1
[GraphBuilderPatternOptimization-JUE.optimize] applies 100 matches, 2*ConcatTwiceUnaryPattern, 25*LayerNormalizationScalePattern, 1*UnsqueezeUnsqueezePattern, 48*FunctionHalfRotaryEmbeddingPattern, 24*FastGeluPattern - time=0.184 | max_time=IdentityPattern:0.042
[GraphBuilderPatternOptimization-JUE.optimize] iteration 3: 908 nodes, priority=1
[GraphBuilderPatternOptimization-JUE.optimize] applies 25 matches, 1*FunctionCausalMaskPattern, 24*SkipLayerNormalizationPattern - time=0.109 | max_time=IdentityPattern:0.012
[GraphBuilderPatternOptimization-JUE.optimize] iteration 4: 883 nodes, priority=1
[GraphBuilderPatternOptimization-JUE.optimize] applies 1 matches, [0]=MatchResult: FunctionCausalMaskMulAddPattern replaces ['Squeeze', 'Squeeze', 'Range', 'Range', 'Unsqueeze', 'Unsqueeze', 'Mul', 'Add'] - time=0.103 | max_time=IdentityPattern:0.011
[GraphBuilderPatternOptimization-JUE.optimize] iteration 5: 877 nodes, priority=1
[GraphBuilderPatternOptimization-JUE.optimize] applies 1 matches, [0]=MatchResult: FunctionCosSinCachePattern replaces ['Squeeze', 'Squeeze', 'Range', 'Unsqueeze', 'Cast', 'Reshape', 'Mul', 'Cos', 'Cast', 'Sin', 'Cast'] - time=0.097 | max_time=IdentityPattern:0.008
[GraphBuilderPatternOptimization-JUE.optimize] iteration 6: 867 nodes, priority=1
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 2
[GraphBuilderPatternOptimization-JUE.optimize] iteration 7: 867 nodes, priority=2
[GraphBuilderPatternOptimization-JUE.optimize] applies 1 matches, [0]=MatchResult: ContribRotaryEmbeddingPattern replaces ['Concat', 'Concat', 'Split', 'HalfRotaryEmbedding', 'Concat'] - time=0.111 | max_time=IdentityPattern:0.016
[GraphBuilderPatternOptimization-JUE.optimize] iteration 8: 872 nodes, priority=2
[GraphBuilderPatternOptimization-JUE.optimize] applies 3 matches, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern - time=0.106 | max_time=ShapeBasedEditDistanceReshapePattern:0.014
[GraphBuilderPatternOptimization-JUE.optimize] iteration 9: 876 nodes, priority=2
[GraphBuilderPatternOptimization-JUE.optimize] applies 6 matches, 2*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.091 | max_time=ShapeBasedEditDistanceReshapePattern:0.008
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=19, n_removed=26, n_applied=620 applied patterns, 874 nodes left with 3 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 10: 874 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 5 matches, 1*ShapeBasedEditDistanceReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.119 | max_time=IdentityPattern:0.014
[GraphBuilderPatternOptimization-JUE.optimize] iteration 11: 880 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 9 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ReshapeReshapePattern, 3*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.105 | max_time=IdentityPattern:0.009
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=641 applied patterns, 873 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 12: 873 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 8 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.102 | max_time=IdentityPattern:0.011
[GraphBuilderPatternOptimization-JUE.optimize] iteration 13: 876 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 14 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 3*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.163 | max_time=IdentityPattern:0.030
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=670 applied patterns, 864 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 14: 864 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.119 | max_time=IdentityPattern:0.017
[GraphBuilderPatternOptimization-JUE.optimize] iteration 15: 863 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.124 | max_time=IdentityPattern:0.015
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=704 applied patterns, 850 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 16: 850 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.118 | max_time=ShapeBasedEditDistanceReshapePattern:0.009
[GraphBuilderPatternOptimization-JUE.optimize] iteration 17: 849 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.117 | max_time=IdentityPattern:0.009
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=738 applied patterns, 836 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 18: 836 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.157 | max_time=ShapeBasedEditDistanceReshapePattern:0.021
[GraphBuilderPatternOptimization-JUE.optimize] iteration 19: 835 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.097 | max_time=ShapeBasedEditDistanceReshapePattern:0.007
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=772 applied patterns, 822 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 20: 822 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.112 | max_time=IdentityPattern:0.015
[GraphBuilderPatternOptimization-JUE.optimize] iteration 21: 821 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.109 | max_time=IdentityPattern:0.009
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=806 applied patterns, 808 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 22: 808 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.130 | max_time=IdentityPattern:0.031
[GraphBuilderPatternOptimization-JUE.optimize] iteration 23: 807 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.136 | max_time=IdentityPattern:0.017
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=840 applied patterns, 794 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 24: 794 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.113 | max_time=IdentityPattern:0.013
[GraphBuilderPatternOptimization-JUE.optimize] iteration 25: 793 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.102 | max_time=IdentityPattern:0.017
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=874 applied patterns, 780 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 26: 780 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.141 | max_time=IdentityPattern:0.016
[GraphBuilderPatternOptimization-JUE.optimize] iteration 27: 779 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.104 | max_time=IdentityPattern:0.015
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=908 applied patterns, 766 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 28: 766 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.122 | max_time=IdentityPattern:0.036
[GraphBuilderPatternOptimization-JUE.optimize] iteration 29: 765 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.085 | max_time=ShapeBasedEditDistanceReshapePattern:0.006
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=942 applied patterns, 752 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 30: 752 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.095 | max_time=IdentityPattern:0.015
[GraphBuilderPatternOptimization-JUE.optimize] iteration 31: 751 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.095 | max_time=IdentityPattern:0.014
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=976 applied patterns, 738 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 32: 738 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.113 | max_time=IdentityPattern:0.023
[GraphBuilderPatternOptimization-JUE.optimize] iteration 33: 737 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.114 | max_time=ShapeBasedEditDistanceReshapePattern:0.008
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1010 applied patterns, 724 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 34: 724 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.139 | max_time=IdentityPattern:0.016
[GraphBuilderPatternOptimization-JUE.optimize] iteration 35: 723 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 14 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 3*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.125 | max_time=IdentityPattern:0.021
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1043 applied patterns, 711 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 36: 711 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.105 | max_time=IdentityPattern:0.009
[GraphBuilderPatternOptimization-JUE.optimize] iteration 37: 710 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.090 | max_time=IdentityPattern:0.011
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1077 applied patterns, 697 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 38: 697 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.091 | max_time=ShapeBasedEditDistanceReshapePattern:0.007
[GraphBuilderPatternOptimization-JUE.optimize] iteration 39: 696 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.117 | max_time=IdentityPattern:0.030
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1111 applied patterns, 683 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 40: 683 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.084 | max_time=IdentityPattern:0.007
[GraphBuilderPatternOptimization-JUE.optimize] iteration 41: 682 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.104 | max_time=IdentityPattern:0.009
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1145 applied patterns, 669 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 42: 669 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.105 | max_time=IdentityPattern:0.016
[GraphBuilderPatternOptimization-JUE.optimize] iteration 43: 668 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.095 | max_time=ShapeBasedEditDistanceReshapePattern:0.008
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1179 applied patterns, 655 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 44: 655 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.114 | max_time=ShapeBasedEditDistanceReshapePattern:0.007
[GraphBuilderPatternOptimization-JUE.optimize] iteration 45: 654 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.079 | max_time=SameChildrenPattern:0.005
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1213 applied patterns, 641 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 46: 641 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.097 | max_time=IdentityPattern:0.008
[GraphBuilderPatternOptimization-JUE.optimize] iteration 47: 640 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.102 | max_time=IdentityPattern:0.010
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1247 applied patterns, 627 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 48: 627 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.079 | max_time=SameChildrenPattern:0.005
[GraphBuilderPatternOptimization-JUE.optimize] iteration 49: 626 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.088 | max_time=IdentityPattern:0.008
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1281 applied patterns, 613 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 50: 613 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.084 | max_time=IdentityPattern:0.014
[GraphBuilderPatternOptimization-JUE.optimize] iteration 51: 612 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.094 | max_time=IdentityPattern:0.013
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1315 applied patterns, 599 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 52: 599 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.084 | max_time=CastCastBinaryPattern:0.005
[GraphBuilderPatternOptimization-JUE.optimize] iteration 53: 598 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 15 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern - time=0.096 | max_time=SameChildrenPattern:0.006
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=21, n_removed=29, n_applied=1349 applied patterns, 585 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 54: 585 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 12 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbeddingPattern, 1*ContribRotaryEmbedding3DPattern, 1*MultiHeadAttention3DPattern - time=0.105 | max_time=IdentityPattern:0.020
[GraphBuilderPatternOptimization-JUE.optimize] iteration 55: 582 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 14 matches, 1*ShapeBasedEditDistanceReshapePattern, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 4*SameChildrenPattern, 2*SqueezeUnsqueezePattern, 1*ContribRotaryEmbedding3DPattern - time=0.115 | max_time=IdentityPattern:0.025
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=11, n_removed=15, n_applied=1379 applied patterns, 567 nodes left with 4 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 56: 567 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 8 matches, 1*ShapeBasedEditDistanceReshapePattern, 5*ShapedBasedReshapePattern, 1*ReshapeReshapePattern, 1*MultiHeadAttention3DPattern - time=0.075 | max_time=IdentityPattern:0.011
[GraphBuilderPatternOptimization-JUE.optimize] iteration 57: 558 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 7 matches, 1*ShapedBasedReshapePattern, 5*ReshapeReshapePattern, 1*SameChildrenPattern - time=0.060 | max_time=SameChildrenPattern:0.005
[GraphBuilderPatternOptimization-JUE.optimize] reapply {'SameChildrenPattern'}
[GraphBuilderPatternOptimization-JUE.optimize] n_added=0, n_removed=0, n_applied=1394 applied patterns, 551 nodes left with 1 iterations
[GraphBuilderPatternOptimization-JUE.optimize] increase priority to 3
[GraphBuilderPatternOptimization-JUE.optimize] iteration 58: 551 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] applies 5 matches, 5*ShapedBasedReshapePattern - time=0.068 | max_time=SameChildrenPattern:0.006
[GraphBuilderPatternOptimization-JUE.optimize] iteration 59: 546 nodes, priority=3
[GraphBuilderPatternOptimization-JUE.optimize] stops current_priority_index=4, priorities=[0, 1, 2, 3]
[GraphBuilderPatternOptimization-JUE.optimize] done after 60 iterations with 546 nodes in 12.728
[OrderOptimization.optimize] ALGO-2
[OrderOptimization.random_order] -- starts with 471 nodes, 353 initializers
[OrderOptimization.shape_order] done after in 0.002608037000754848s with changed=4 scale=16
[GraphBuilder-JUE.optimize] done with 471 nodes in 15.077
[GraphBuilder-JUE.to_onnx] make_model 499 inits 341 params
[GraphBuilder-JUE.time_evaluation_constants_] 0.0009738480002852157
[GraphBuilder-JUE._build_initializers] start with 499 initializers, large_model=True, external_threshold=1024
[GraphBuilder-JUE._build_initializers] switch low/high order
[GraphBuilder-JUE._build_initializers] done in 3.35000004270114e-06s with 353 initializers, 341 large initializers
[GraphBuilder-JUE._add_shape_information] dynamic shapes replacements={'seq_length': 'seq_length', 'batch': 'batch', 'cache_length': 'cache_length', 's1': 'batch', 's60': 'batch', 's43': 'batch', 's47': 'batch', 's39': 'batch', 's69': 'batch', 's91': 'batch', 'batch^s3^batch^s41': 'batch', 's93': 'batch', 'batch^s98^batch^s79': 'batch', 'batch^s92^batch^s83': 'batch', 'batch^s84^batch^s91': 'batch', 's61': 'batch', 's30': 'batch', 'batch^s45^batch^s47': 'batch', 'batch^s30^batch^s89': 'batch', 's64': 'batch', 'batch^s49^batch^s26': 'batch', 's56': 'batch', 's106': 'batch', 'batch^s35^batch^s60': 'batch', 'batch^s64^batch^s86': 'batch', 's90': 'batch', 's79': 'batch', 's77': 'batch', 'batch^s97^batch^s10': 'batch', 's13': 'batch', 's87': 'batch', 's62': 'batch', 'batch^s39^batch^s71': 'batch', 's3': 'batch', 'batch^s82^batch^s62': 'batch', 's41': 'batch', 's59': 'batch', 'batch^s52^batch^s93': 'batch', 's35': 'batch', 's72': 'batch', 's23': 'batch', 's52': 'batch', 's100': 'batch', 's36': 'batch', 's49': 'batch', 's48': 'batch', 'batch^s34^batch^s77': 'batch', 'batch^s104^batch^s106': 'batch', 's104': 'batch', 's57': 'batch', 's8': 'batch', 's86': 'batch', 's102': 'batch', 's89': 'batch', 'batch^s29^batch^s8': 'batch', 's29': 'batch', 's83': 'batch', 's98': 'batch', 'batch^s87^batch^s23': 'batch', 's97': 'batch', 's67': 'batch', 'batch^s1^batch^s75': 'batch', 's84': 'batch', 'batch^s69^batch^s56': 'batch', 's45': 'batch', 'batch^s48^batch^s59': 'batch', 's26': 'batch', 'batch^s67^batch^s61': 'batch', 's82': 'batch', 'batch^s90^batch^s57': 'batch', 's71': 'batch', 'batch^s36^batch^s13': 'batch', 's10': 'batch', 's34': 'batch', 's92': 'batch', 's75': 'batch', 'batch^s100^batch^s102': 'batch', 's70': 'seq_length', 's42': 'cache_length', 's65': 'cache_length', 's101': 'cache_length', 's76': 'cache_length', 's31': 'cache_length', 's28': 'cache_length', 's63': 'cache_length', 's85': 'cache_length', 's105': 'cache_length', 's88': 'cache_length', 's99': 'cache_length', 's96': 'cache_length', 's95': 'cache_length', 's14': 'cache_length', 's18': 'cache_length', 's46': 'cache_length', 's11': 'cache_length', 's66': 'cache_length', 's50': 'cache_length', 's55': 'cache_length', 's81': 'cache_length', 's27': 'cache_length', 's38': 'cache_length', 's51': 'cache_length', 's40': 'cache_length', 's21': 'cache_length', 's54': 'cache_length', 's37': 'cache_length', 's73': 'cache_length', 's2': 'cache_length', 's58': 'cache_length', 's25': 'cache_length', 's107': 'cache_length', 's68': 'cache_length', 's44': 'cache_length', 's15': 'cache_length', 's32': 'cache_length', 's33': 'cache_length', 's22': 'cache_length', 's94': 'cache_length', 's103': 'cache_length', 's9': 'cache_length', 's4': 'cache_length', 's78': 'cache_length', 's74': 'cache_length', 's7': 'cache_length', 's80': 'cache_length', 's24': 'cache_length'}
[to_onnx] to_onnx done in 15.317917730000772s and 471 nodes, 353 initializers, 50 inputs, 49 outputs
-- done in 38.80732817199896

onnx_generate

Then we can call method generate for two tokens. This function is part of onnx_diagnostic but follows the implementation seen earlier for a torch model. Let’s ask first the function to return the session to avoid creating on the second call.

_res, session, _feeds = onnx_generate(
    model_name, inputs.input_ids, 2, max_new_tokens=2, return_session=True
)

# And now the full answer.
print("-- compute the answer with custom generate...")
begin = time.perf_counter()
outputs = onnx_generate(
    session, inputs.input_ids, eos_token_id=tokenizer.eos_token_id, max_new_tokens=100
)
duration = time.perf_counter() - begin
print(f"-- done in {duration}")
data.append(dict(name="onnx", duration=duration))

print("-- done.")
print("output shape:", string_type(outputs, with_shape=True, with_min_max=True))
print("-- decode the answer...")
text = tokenizer.batch_decode(outputs)[0]
print("-- done.")
print(text)
-- compute the answer with custom generate...
-- done in 1.6451467569968372
-- done.
output shape: T7s1x123[7,50285:A10138.878048780489]
-- decode the answer...
-- done.
def print_prime(n):
   """
   Print all primes between 1 and n
   """
   primes = []
   for num in range(2, n+1):
       is_prime = True
       for i in range(2, int(math.sqrt(num))+1):
           if num % i == 0:
               is_prime = False
               break
       if is_prime:
           primes.append(num)
   print(primes)

print_prime(20)
``

Plots

df = pandas.DataFrame(data).set_index("name")
print(df)
          duration
name
generate  3.005987
custom    4.376284
onnx      1.645147
ax = df.plot(kind="bar", title="Time (s) comparison to generate a prompt.", rot=45)
ax.figure.tight_layout()
ax.figure.savefig("plot_generate.png")
Time (s) comparison to generate a prompt.

Total running time of the script: (0 minutes 55.258 seconds)

Related examples

LayerNormalization implementation cannot be exchanged

LayerNormalization implementation cannot be exchanged

Gemm or Matmul + Add

Gemm or Matmul + Add

Export with loops

Export with loops

Gallery generated by Sphinx-Gallery