onnx_diagnostic.helpers.dot_helper¶
- onnx_diagnostic.helpers.dot_helper.to_dot(model: ModelProto) str[source][source]¶
Converts a model into a dot graph. Here is an example:
![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="p_model_layers_0_self_attn_q_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_6 [label="p_model_layers_0_self_attn_k_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_7 [label="p_model_layers_0_self_attn_v_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_8 [label="p_model_layers_0_self_attn_o_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="p_model_layers_0_mlp_gate_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_10 [label="p_model_layers_0_mlp_up_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_11 [label="p_model_layers_0_mlp_down_proj_weight::T10\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_12 [label="p_lm_head_weight::T10\nFLOAT(192, 32000)", fillcolor="#cccc00"];
i_13 [label="to_422\nFLOAT(1, 1, 48)", fillcolor="#cccc00"];
i_14 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_15 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_16 [label="model.layers.0.post_attention_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_17 [label="model.norm.weight\nFLOAT(192)", fillcolor="#cccc00"];
Cast_18 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
Unsqueeze_19 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
Range_22 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Shape_23 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Concat_24 [label="Concat(., [1], axis=0)", fillcolor="#cccccc"];
Expand_25 [label="Expand(., .)", fillcolor="#cccccc"];
Squeeze_26 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_27 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Add_28 [label="Add(., .)", fillcolor="#cccccc"];
Squeeze_29 [label="Squeeze(.)", fillcolor="#eeeeee"];
Range_30 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_31 [label="Unsqueeze(., [0, 1, 2])", fillcolor="#eeeeee"];
CausalMask_32 [label="intermediate.\nCausalMask(., .)", fillcolor="#cccccc"];
Gather_33 [label="Gather(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_34 [label="SimplifiedLayerNormalization(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
Cast_35 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Range_36 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Shape_37 [label="Shape(., start=-1)", fillcolor="#d2a81f"];
Mul_38 [label="Mul(., .)", fillcolor="#cccccc"];
Unsqueeze_39 [label="Unsqueeze(., [1, 2, 3])", fillcolor="#eeeeee"];
Add_40 [label="Add(., .)", fillcolor="#cccccc"];
Reshape_41 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Reshape_42 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Gather_43 [label="Gather(., .)", fillcolor="#cccccc"];
Shape_44 [label="Shape(.)", fillcolor="#d2a81f"];
Reshape_45 [label="Reshape(., .)", fillcolor="#eeeeee"];
And_46 [label="And(., .)", fillcolor="#cccccc"];
Constant_47 [label="Constant() -> to_5-ZEROS2", fillcolor="#cccccc"];
Reshape_48 [label="Reshape(., .)", fillcolor="#eeeeee"];
Mul_49 [label="Mul(., .)", fillcolor="#cccccc"];
Sin_50 [label="Sin(.)", fillcolor="#cccccc"];
Squeeze_51 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
Cos_52 [label="Cos(.)", fillcolor="#cccccc"];
Squeeze_53 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
MatMul_54 [label="MatMul(., .)", fillcolor="#ee9999"];
RotaryEmbedding_55 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
Reshape_56 [label="Reshape(., [0, 0, 2, 96])", fillcolor="#eeeeee"];
Transpose_57 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_58 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_59 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
RotaryEmbedding_60 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
MatMul_61 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_62 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
Concat_63 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_64 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_65 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_66 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_67 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
Unsqueeze_68 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_69 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_70 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
LocalAttention_to1_71 [label="intermediate.\nLocalAttention_to1(., ., ., ., [0.31947157])", fillcolor="#cccccc"];
Transpose_72 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Reshape_73 [label="Reshape(., [0, 0, 192])", fillcolor="#eeeeee"];
MatMul_74 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_75 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
MatMul_76 [label="MatMul(., .)", fillcolor="#ee9999"];
QuickGelu_77 [label="com.microsoft.\nQuickGelu(.)", fillcolor="#cccccc"];
MatMul_78 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_79 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_80 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_81 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
MatMul_82 [label="MatMul(., .)", fillcolor="#ee9999"];
I_2 -> Cast_18 [label="INT64(batch,seq_length)"];
Cast_18 -> Unsqueeze_19 [label="FLOAT(batch,seq_length)"];
I_0 -> Shape_20 [label="INT64(batch,seq_length)"];
Shape_20 -> Squeeze_21 [label="INT64(1)"];
Squeeze_21 -> Range_22 [label="INT64()"];
I_2 -> Shape_23 [label="INT64(batch,seq_length)"];
Shape_23 -> Concat_24 [label="INT64(1)"];
Range_22 -> Expand_25 [label="INT64(seq_length)"];
Concat_24 -> Expand_25 [label="INT64(2)"];
Shape_23 -> Squeeze_26 [label="INT64(1)"];
I_3 -> Shape_27 [label="FLOAT(batch,1,cache_length,96)"];
Shape_27 -> Add_28 [label="INT64(1)"];
Shape_20 -> Add_28 [label="INT64(1)"];
Add_28 -> Squeeze_29 [label="INT64(1)"];
Squeeze_29 -> Range_30 [label="INT64()"];
Range_30 -> Unsqueeze_31 [label="INT64(cache_length+seq_length)"];
Shape_27 -> CausalMask_32 [label="INT64(1)"];
Add_28 -> CausalMask_32 [label="INT64(1)"];
i_14 -> Gather_33 [label="FLOAT(32000, 192)"];
I_0 -> Gather_33 [label="INT64(batch,seq_length)"];
Gather_33 -> SimplifiedLayerNormalization_34 [label="FLOAT(batch,seq_length,192)"];
i_15 -> SimplifiedLayerNormalization_34 [label="FLOAT(192)"];
I_1 -> Cast_35 [label="INT64(batch,cache+seq)"];
Squeeze_26 -> Range_36 [label="INT64()"];
Cast_35 -> Shape_37 [label="BOOL(batch,cache+seq)"];
Range_36 -> Mul_38 [label="INT64(batch)"];
Shape_37 -> Mul_38 [label="INT64(1)"];
Mul_38 -> Unsqueeze_39 [label="INT64(batch)"];
Unsqueeze_31 -> Add_40 [label="INT64(1,1,1,cache_length+seq_length)"];
Unsqueeze_39 -> Add_40 [label="INT64(batch,1,1,1)"];
Cast_35 -> Reshape_41 [label="BOOL(batch,cache+seq)"];
Add_40 -> Reshape_42 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
Reshape_41 -> Gather_43 [label="BOOL(batch*(cache+seq))"];
Reshape_42 -> Gather_43 [label="INT64(batch*(cache_length+seq_length))"];
Add_40 -> Shape_44 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
Gather_43 -> Reshape_45 [label="BOOL(batch*(cache_length+seq_length))"];
Shape_44 -> Reshape_45 [label="INT64(4)"];
CausalMask_32 -> And_46 [label="BOOL(1,\n1,\nseq_length,\ncache_length+seq_length)"];
Reshape_45 -> And_46 [label="BOOL(batch,\n1,\n1,\ncache_length+seq_length)"];
Unsqueeze_19 -> Reshape_48 [label="FLOAT(batch,1,seq_length)"];
Constant_47 -> Reshape_48 [label="INT64(3)"];
i_13 -> Mul_49 [label="FLOAT(1, 1, 48)"];
Reshape_48 -> Mul_49 [label="FLOAT(batch,seq_length,1)"];
Mul_49 -> Sin_50 [label="FLOAT(batch,seq_length,48)"];
Sin_50 -> Squeeze_51 [label="FLOAT(batch,seq_length,48)"];
Mul_49 -> Cos_52 [label="FLOAT(batch,seq_length,48)"];
Cos_52 -> Squeeze_53 [label="FLOAT(batch,seq_length,48)"];
SimplifiedLayerNormalization_34 -> MatMul_54 [label="FLOAT(batch,seq_length,192)"];
i_5 -> MatMul_54 [label="FLOAT(192, 192)"];
MatMul_54 -> RotaryEmbedding_55 [label="FLOAT(batch,seq_length,192)"];
Expand_25 -> RotaryEmbedding_55 [label="INT64(batch,seq_length)"];
Squeeze_53 -> RotaryEmbedding_55 [label="FLOAT(seq_length,48)"];
Squeeze_51 -> RotaryEmbedding_55 [label="FLOAT(seq_length,48)"];
RotaryEmbedding_55 -> Reshape_56 [label="FLOAT(batch,seq_length,192)"];
Reshape_56 -> Transpose_57 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_34 -> MatMul_58 [label="FLOAT(batch,seq_length,192)"];
i_6 -> MatMul_58 [label="FLOAT(192, 96)"];
MatMul_58 -> Unsqueeze_59 [label="FLOAT(batch,seq_length,96)"];
Unsqueeze_59 -> RotaryEmbedding_60 [label="FLOAT(batch,1,seq_length,96)"];
Expand_25 -> RotaryEmbedding_60 [label="INT64(batch,seq_length)"];
Squeeze_53 -> RotaryEmbedding_60 [label="FLOAT(seq_length,48)"];
Squeeze_51 -> RotaryEmbedding_60 [label="FLOAT(seq_length,48)"];
SimplifiedLayerNormalization_34 -> MatMul_61 [label="FLOAT(batch,seq_length,192)"];
i_7 -> MatMul_61 [label="FLOAT(192, 96)"];
MatMul_61 -> Unsqueeze_62 [label="FLOAT(batch,seq_length,96)"];
I_3 -> Concat_63 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_60 -> Concat_63 [label="FLOAT(batch,1,seq_length,96)"];
I_4 -> Concat_64 [label="FLOAT(batch,1,cache_length,96)"];
Unsqueeze_62 -> Concat_64 [label="FLOAT(batch,1,seq_length,96)"];
Concat_63 -> Unsqueeze_65;
Unsqueeze_65 -> Expand_66 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_66 -> Squeeze_67 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Concat_64 -> Unsqueeze_68;
Unsqueeze_68 -> Expand_69 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_69 -> Squeeze_70 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Transpose_57 -> LocalAttention_to1_71 [label="FLOAT(batch,2,seq_length,96)"];
Squeeze_67 -> LocalAttention_to1_71 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
Squeeze_70 -> LocalAttention_to1_71 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
And_46 -> LocalAttention_to1_71 [label="BOOL(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
LocalAttention_to1_71 -> Transpose_72 [label="FLOAT(batch,2,seq_length,96)"];
Transpose_72 -> Reshape_73 [label="FLOAT(batch,seq_length,2,96)"];
Reshape_73 -> MatMul_74 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_74 [label="FLOAT(192, 192)"];
Gather_33 -> SkipSimplifiedLayerNormalization_75 [label="FLOAT(batch,seq_length,192)"];
MatMul_74 -> SkipSimplifiedLayerNormalization_75 [label="FLOAT(batch,seq_length,192)"];
i_16 -> SkipSimplifiedLayerNormalization_75 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_75 -> MatMul_76 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_76 [label="FLOAT(192, 1024)"];
MatMul_76 -> QuickGelu_77 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_75 -> MatMul_78 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_78 [label="FLOAT(192, 1024)"];
QuickGelu_77 -> Mul_79 [label="FLOAT(batch,seq_length,1024)"];
MatMul_78 -> Mul_79 [label="FLOAT(batch,seq_length,1024)"];
Mul_79 -> MatMul_80 [label="FLOAT(batch,seq_length,1024)"];
i_11 -> MatMul_80 [label="FLOAT(1024, 192)"];
SkipSimplifiedLayerNormalization_75 -> SkipSimplifiedLayerNormalization_81 [label="FLOAT(batch,seq_length,192)"];
MatMul_80 -> SkipSimplifiedLayerNormalization_81 [label="FLOAT(batch,seq_length,192)"];
i_17 -> SkipSimplifiedLayerNormalization_81 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_81 -> MatMul_82 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_82 [label="FLOAT(192, 32000)"];
O_83 [label="output_0\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_82 -> O_83;
O_84 [label="present_key_values_key_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_63 -> O_84;
O_85 [label="present_key_values_value_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_64 -> O_85;
}](../../_images/graphviz-0f8c9c65ddef12a7281cdfac6114d2021b27f5dc.png)
Or this one obtained with
torch.onnx.export().![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_6 [label="val_343\nFLOAT(1, 48)", fillcolor="#cccc00"];
i_7 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_8 [label="val_119\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="val_126\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_10 [label="val_133\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_11 [label="val_287\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_12 [label="val_292\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_13 [label="val_294\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_14 [label="val_295\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_15 [label="val_320\nFLOAT(192, 32000)", fillcolor="#cccc00"];
Shape_16 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_17 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_18 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Squeeze_19 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_22 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_24 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
Add_26 [label="Add(., .)", fillcolor="#cccccc"];
Max_27 [label="Max(., .)", fillcolor="#cccccc"];
Add_28 [label="Add(., .)", fillcolor="#cccccc"];
Add_29 [label="Add(., .)", fillcolor="#cccccc"];
Gather_30 [label="Gather(., ., axis=0)", fillcolor="#cccccc"];
Range_31 [label="Range(., ., 1)", fillcolor="#cccccc"];
Cast_32 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Range_33 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_35 [label="Unsqueeze(., [1, 2])", fillcolor="#eeeeee"];
Unsqueeze_36 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
Unsqueeze_37 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
Unsqueeze_38 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
Unsqueeze_39 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
Unsqueeze_40 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
LessOrEqual_41 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
And_42 [label="And(True, .)", fillcolor="#cccccc"];
Max_43 [label="Max(., .)", fillcolor="#cccccc"];
Shape_44 [label="Shape(., start=0)", fillcolor="#d2a81f"];
Expand_45 [label="Expand(., .)", fillcolor="#cccccc"];
Unsqueeze_46 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Expand_47 [label="Expand(., .)", fillcolor="#cccccc"];
Unsqueeze_48 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Concat_49 [label="Concat(., ., axis=-1)", fillcolor="#cccccc"];
GatherND_50 [label="GatherND(., .)", fillcolor="#cccccc"];
And_51 [label="And(., .)", fillcolor="#cccccc"];
Reshape_52 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_53 [label="Concat(., [1], ., ., axis=0)", fillcolor="#cccccc"];
Expand_54 [label="Expand(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_55 [label="SimplifiedLayerNormalization(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
MatMul_56 [label="MatMul(., .)", fillcolor="#ee9999"];
Concat_57 [label="Concat(., ., [-1], [96], axis=0)", fillcolor="#cccccc"];
Reshape_58 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_59 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_60 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_61 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_62 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_63 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_64 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_65 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
ReduceMax_66 [label="ReduceMax(.)", fillcolor="#cccccc"];
Add_67 [label="Add(., 1)", fillcolor="#cccccc"];
Range_68 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Reshape_69 [label="Reshape(., [-1, 1])", fillcolor="#eeeeee"];
Cast_70 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
MatMul_71 [label="MatMul(., .)", fillcolor="#ee9999"];
Cos_72 [label="Cos(.)", fillcolor="#cccccc"];
Sin_73 [label="Sin(.)", fillcolor="#cccccc"];
RotaryEmbedding_74 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
RotaryEmbedding_75 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
Concat_76 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_77 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_78 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_79 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_80 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_81 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_82 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_83 [label="Reshape(., .)", fillcolor="#eeeeee"];
Unsqueeze_84 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_85 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_86 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_87 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_88 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_89 [label="Reshape(., .)", fillcolor="#eeeeee"];
Slice_90 [label="Slice(., [0], ., [3], [1])", fillcolor="#cccccc"];
Shape_91 [label="Shape(., start=0)", fillcolor="#d2a81f"];
Slice_92 [label="Slice(., [-1], [9223372036854775807])", fillcolor="#cccccc"];
Slice_93 [label="Slice(., [-2], [-1])", fillcolor="#cccccc"];
Slice_94 [label="Slice(., [-9223372036854775808], [-2])", fillcolor="#cccccc"];
Concat_95 [label="Concat([-1], ., ., axis=0)", fillcolor="#cccccc"];
Reshape_96 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_97 [label="Transpose(., perm=[0, 2, 1])", fillcolor="#ee99ee"];
Concat_98 [label="Concat(., ., ., axis=0)", fillcolor="#cccccc"];
Reshape_99 [label="Reshape(., .)", fillcolor="#eeeeee"];
Mul_100 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Mul_101 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Where_102 [label="Where(., 0.0, -inf)", fillcolor="#cccccc"];
MatMul_103 [label="MatMul(., .)", fillcolor="#ee9999"];
Add_104 [label="Add(., .)", fillcolor="#cccccc"];
Softmax_105 [label="Softmax(., axis=-1)", fillcolor="#cccccc"];
IsNaN_106 [label="IsNaN(.)", fillcolor="#cccccc"];
Where_107 [label="Where(., 0.0, .)", fillcolor="#cccccc"];
MatMul_108 [label="MatMul(., .)", fillcolor="#ee9999"];
Transpose_109 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Concat_110 [label="Concat(., ., [-1], axis=0)", fillcolor="#cccccc"];
Reshape_111 [label="Reshape(., .)", fillcolor="#eeeeee"];
MatMul_112 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_113 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
MatMul_114 [label="MatMul(., .)", fillcolor="#ee9999"];
Sigmoid_115 [label="Sigmoid(.)", fillcolor="#cccccc"];
Mul_116 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_117 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_118 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_119 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_120 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
MatMul_121 [label="MatMul(., .)", fillcolor="#ee9999"];
I_0 -> Shape_16 [label="INT64(batch,seq_length)"];
Shape_16 -> Squeeze_17 [label="INT64(1)"];
I_2 -> Shape_18 [label="INT64(batch,seq_length)"];
Shape_18 -> Squeeze_19 [label="INT64(1)"];
I_2 -> Shape_20 [label="INT64(batch,seq_length)"];
Shape_20 -> Squeeze_21 [label="INT64(1)"];
I_3 -> Shape_22 [label="FLOAT(batch,1,cache_length,96)"];
Shape_22 -> Squeeze_23 [label="INT64(1)"];
I_4 -> Shape_24 [label="FLOAT(batch,1,cache_length,96)"];
Shape_24 -> Squeeze_25 [label="INT64(1)"];
Squeeze_23 -> Add_26 [label="INT64()"];
Squeeze_17 -> Add_26 [label="INT64()"];
Squeeze_17 -> Max_27 [label="INT64()"];
Squeeze_21 -> Max_27 [label="INT64()"];
Squeeze_23 -> Add_28 [label="INT64()"];
Max_27 -> Add_28 [label="INT64()"];
Squeeze_25 -> Add_29 [label="INT64()"];
Squeeze_17 -> Add_29 [label="INT64()"];
i_7 -> Gather_30 [label="FLOAT(32000, 192)"];
I_0 -> Gather_30 [label="INT64(batch,seq_length)"];
Squeeze_23 -> Range_31 [label="INT64()"];
Add_26 -> Range_31 [label="INT64()"];
I_1 -> Cast_32 [label="INT64(batch,cache+seq)"];
Squeeze_19 -> Range_33 [label="INT64()"];
Add_26 -> Range_34 [label="INT64()"];
Range_33 -> Unsqueeze_35 [label="INT64(batch)"];
Unsqueeze_35 -> Unsqueeze_36 [label="INT64(batch,1,1)"];
Range_31 -> Unsqueeze_37 [label="INT64(seq_length)"];
Unsqueeze_37 -> Unsqueeze_38 [label="INT64(1,1,seq_length)"];
Range_34 -> Unsqueeze_39 [label="INT64(cache_length + seq_length)"];
Unsqueeze_39 -> Unsqueeze_40 [label="INT64(1,1,cache_length + seq_length)"];
Unsqueeze_40 -> LessOrEqual_41 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_38 -> LessOrEqual_41 [label="INT64(1,1,seq_length,1)"];
LessOrEqual_41 -> And_42 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
Unsqueeze_36 -> Max_43 [label="INT64(batch,1,1,1)"];
Unsqueeze_40 -> Max_43 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Max_43 -> Shape_44 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_36 -> Expand_45 [label="INT64(batch,1,1,1)"];
Shape_44 -> Expand_45 [label="INT64(4)"];
Expand_45 -> Unsqueeze_46 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_40 -> Expand_47 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Shape_44 -> Expand_47 [label="INT64(4)"];
Expand_47 -> Unsqueeze_48 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_46 -> Concat_49 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
Unsqueeze_48 -> Concat_49 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
Cast_32 -> GatherND_50 [label="BOOL(batch,cache+seq)"];
Concat_49 -> GatherND_50 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n2)"];
And_42 -> And_51 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
GatherND_50 -> And_51 [label="BOOL(batch,\n1,\n1,\ncache_length + seq_length)"];
Add_26 -> Reshape_52 [label="INT64()"];
Shape_18 -> Concat_53 [label="INT64(1)"];
Shape_16 -> Concat_53 [label="INT64(1)"];
Reshape_52 -> Concat_53 [label="INT64(1)"];
And_51 -> Expand_54 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Concat_53 -> Expand_54 [label="INT64(4)"];
Gather_30 -> SimplifiedLayerNormalization_55 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SimplifiedLayerNormalization_55 [label="FLOAT(192)"];
SimplifiedLayerNormalization_55 -> MatMul_56 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_56 [label="FLOAT(192, 192)"];
Shape_18 -> Concat_57 [label="INT64(1)"];
Shape_16 -> Concat_57 [label="INT64(1)"];
MatMul_56 -> Reshape_58 [label="FLOAT(batch,seq_length,192)"];
Concat_57 -> Reshape_58 [label="INT64(4)"];
Reshape_58 -> Transpose_59 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_55 -> MatMul_60 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_60 [label="FLOAT(192, 96)"];
MatMul_60 -> Reshape_61 [label="FLOAT(batch,seq_length,96)"];
Concat_57 -> Reshape_61 [label="INT64(4)"];
Reshape_61 -> Transpose_62 [label="FLOAT(batch,seq_length,1,96)"];
SimplifiedLayerNormalization_55 -> MatMul_63 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_63 [label="FLOAT(192, 96)"];
MatMul_63 -> Reshape_64 [label="FLOAT(batch,seq_length,96)"];
Concat_57 -> Reshape_64 [label="INT64(4)"];
Reshape_64 -> Transpose_65 [label="FLOAT(batch,seq_length,1,96)"];
I_2 -> ReduceMax_66 [label="INT64(batch,seq_length)"];
ReduceMax_66 -> Add_67 [label="INT64()"];
Add_67 -> Range_68 [label="INT64()"];
Range_68 -> Reshape_69 [label="INT64(?)"];
Reshape_69 -> Cast_70 [label="INT64(?,1)"];
Cast_70 -> MatMul_71 [label="FLOAT(?,1)"];
i_6 -> MatMul_71 [label="FLOAT(1, 48)"];
MatMul_71 -> Cos_72 [label="FLOAT(?,48)"];
MatMul_71 -> Sin_73 [label="FLOAT(?,48)"];
Transpose_59 -> RotaryEmbedding_74 [label="FLOAT(batch,2,seq_length,96)"];
I_2 -> RotaryEmbedding_74 [label="INT64(batch,seq_length)"];
Cos_72 -> RotaryEmbedding_74 [label="FLOAT(?,48)"];
Sin_73 -> RotaryEmbedding_74 [label="FLOAT(?,48)"];
Transpose_62 -> RotaryEmbedding_75 [label="FLOAT(batch,1,seq_length,96)"];
I_2 -> RotaryEmbedding_75 [label="INT64(batch,seq_length)"];
Cos_72 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
Sin_73 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
I_3 -> Concat_76 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_75 -> Concat_76 [label="FLOAT(batch,\n1,\nMax(seq_length, seq_length),\n96)"];
I_4 -> Concat_77 [label="FLOAT(batch,1,cache_length,96)"];
Transpose_65 -> Concat_77 [label="FLOAT(batch,1,seq_length,96)"];
Concat_76 -> Unsqueeze_78;
Add_28 -> Reshape_79 [label="INT64()"];
Shape_18 -> Concat_80 [label="INT64(1)"];
Reshape_79 -> Concat_80 [label="INT64(1)"];
Unsqueeze_78 -> Expand_81 [label="FLOAT(batch,\n1,\n1,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_80 -> Expand_81 [label="INT64(5)"];
Shape_18 -> Concat_82 [label="INT64(1)"];
Reshape_79 -> Concat_82 [label="INT64(1)"];
Expand_81 -> Reshape_83 [label="FLOAT(batch,\n1,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_82 -> Reshape_83 [label="INT64(4)"];
Concat_77 -> Unsqueeze_84;
Add_29 -> Reshape_85 [label="INT64()"];
Shape_18 -> Concat_86 [label="INT64(1)"];
Reshape_85 -> Concat_86 [label="INT64(1)"];
Unsqueeze_84 -> Expand_87 [label="FLOAT(batch,\n1,\n1,\ncache_length + seq_length,\n96)"];
Concat_86 -> Expand_87 [label="INT64(5)"];
Shape_18 -> Concat_88 [label="INT64(1)"];
Reshape_85 -> Concat_88 [label="INT64(1)"];
Expand_87 -> Reshape_89 [label="FLOAT(batch,\n1,\n2,\ncache_length + seq_length,\n96)"];
Concat_88 -> Reshape_89 [label="INT64(4)"];
Expand_54 -> Slice_90 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Reshape_79 -> Slice_90 [label="INT64(1)"];
Reshape_83 -> Shape_91 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Shape_91 -> Slice_92 [label="INT64(4)"];
Shape_91 -> Slice_93 [label="INT64(4)"];
Shape_91 -> Slice_94 [label="INT64(4)"];
Slice_93 -> Concat_95 [label="INT64(1)"];
Slice_92 -> Concat_95 [label="INT64(1)"];
Reshape_83 -> Reshape_96 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_95 -> Reshape_96 [label="INT64(3)"];
Reshape_96 -> Transpose_97 [label="FLOAT(?,\ncache_length + Max(seq_length, seq_length),\n96)"];
Slice_94 -> Concat_98 [label="INT64(2)"];
Slice_92 -> Concat_98 [label="INT64(1)"];
Slice_93 -> Concat_98 [label="INT64(1)"];
Transpose_97 -> Reshape_99 [label="FLOAT(?,\n96,\ncache_length + Max(seq_length, seq_length))"];
Concat_98 -> Reshape_99 [label="INT64(4)"];
RotaryEmbedding_74 -> Mul_100 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Reshape_99 -> Mul_101 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
Slice_90 -> Where_102 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Mul_100 -> MatMul_103 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Mul_101 -> MatMul_103 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
MatMul_103 -> Add_104 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\ncache_length + Max(seq_length, seq_length))"];
Where_102 -> Add_104 [label="FLOAT(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Add_104 -> Softmax_105 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_105 -> IsNaN_106 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
IsNaN_106 -> Where_107 [label="BOOL(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_105 -> Where_107 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Where_107 -> MatMul_108 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Reshape_89 -> MatMul_108 [label="FLOAT(batch,\n2,\ncache_length + seq_length,\n96)"];
MatMul_108 -> Transpose_109 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Shape_18 -> Concat_110 [label="INT64(1)"];
Shape_16 -> Concat_110 [label="INT64(1)"];
Transpose_109 -> Reshape_111 [label="FLOAT(batch,\nMax(seq_length, seq_length),\n2,\n96)"];
Concat_110 -> Reshape_111 [label="INT64(3)"];
Reshape_111 -> MatMul_112 [label="FLOAT(batch,\nseq_length,\n((192*Max(seq_length, seq_length))//seq_length))"];
i_11 -> MatMul_112 [label="FLOAT(192, 192)"];
MatMul_112 -> SkipSimplifiedLayerNormalization_113 [label="FLOAT(batch,seq_length,192)"];
Gather_30 -> SkipSimplifiedLayerNormalization_113 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_113 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_113 -> MatMul_114 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_114 [label="FLOAT(192, 1024)"];
MatMul_114 -> Sigmoid_115 [label="FLOAT(batch,seq_length,1024)"];
MatMul_114 -> Mul_116 [label="FLOAT(batch,seq_length,1024)"];
Sigmoid_115 -> Mul_116 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_113 -> MatMul_117 [label="FLOAT(batch,seq_length,192)"];
i_13 -> MatMul_117 [label="FLOAT(192, 1024)"];
Mul_116 -> Mul_118 [label="FLOAT(batch,seq_length,1024)"];
MatMul_117 -> Mul_118 [label="FLOAT(batch,seq_length,1024)"];
Mul_118 -> MatMul_119 [label="FLOAT(batch,seq_length,1024)"];
i_14 -> MatMul_119 [label="FLOAT(1024, 192)"];
MatMul_119 -> SkipSimplifiedLayerNormalization_120 [label="FLOAT(batch,seq_length,192)"];
SkipSimplifiedLayerNormalization_113 -> SkipSimplifiedLayerNormalization_120 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_120 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_120 -> MatMul_121 [label="FLOAT(batch,seq_length,192)"];
i_15 -> MatMul_121 [label="FLOAT(192, 32000)"];
O_122 [label="linear_7\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_121 -> O_122;
O_123 [label="cat_7\nFLOAT(batch,1,cache_length + Max(seq_length, seq_length),96)", fillcolor="#aaaaee"];
Concat_76 -> O_123;
O_124 [label="cat_8\nFLOAT(batch,1,cache_length + seq_length,96)", fillcolor="#aaaaee"];
Concat_77 -> O_124;
}](../../_images/graphviz-e7e8e9ab1c4047f4897867fe38bcc23ca205ae0b.png)
Converts a model into a dot graph. Here is an example:
![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="p_model_layers_0_self_attn_q_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_6 [label="p_model_layers_0_self_attn_k_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_7 [label="p_model_layers_0_self_attn_v_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_8 [label="p_model_layers_0_self_attn_o_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="p_model_layers_0_mlp_gate_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_10 [label="p_model_layers_0_mlp_up_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_11 [label="p_model_layers_0_mlp_down_proj_weight::T10\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_12 [label="p_lm_head_weight::T10\nFLOAT(192, 32000)", fillcolor="#cccc00"];
i_13 [label="to_422\nFLOAT(1, 1, 48)", fillcolor="#cccc00"];
i_14 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_15 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_16 [label="model.layers.0.post_attention_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_17 [label="model.norm.weight\nFLOAT(192)", fillcolor="#cccc00"];
Cast_18 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
Unsqueeze_19 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
Range_22 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Shape_23 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Concat_24 [label="Concat(., [1], axis=0)", fillcolor="#cccccc"];
Expand_25 [label="Expand(., .)", fillcolor="#cccccc"];
Squeeze_26 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_27 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Add_28 [label="Add(., .)", fillcolor="#cccccc"];
Squeeze_29 [label="Squeeze(.)", fillcolor="#eeeeee"];
Range_30 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_31 [label="Unsqueeze(., [0, 1, 2])", fillcolor="#eeeeee"];
CausalMask_32 [label="intermediate.\nCausalMask(., .)", fillcolor="#cccccc"];
Gather_33 [label="Gather(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_34 [label="SimplifiedLayerNormalization(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
Cast_35 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Range_36 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Shape_37 [label="Shape(., start=-1)", fillcolor="#d2a81f"];
Mul_38 [label="Mul(., .)", fillcolor="#cccccc"];
Unsqueeze_39 [label="Unsqueeze(., [1, 2, 3])", fillcolor="#eeeeee"];
Add_40 [label="Add(., .)", fillcolor="#cccccc"];
Reshape_41 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Reshape_42 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Gather_43 [label="Gather(., .)", fillcolor="#cccccc"];
Shape_44 [label="Shape(.)", fillcolor="#d2a81f"];
Reshape_45 [label="Reshape(., .)", fillcolor="#eeeeee"];
And_46 [label="And(., .)", fillcolor="#cccccc"];
Constant_47 [label="Constant() -> to_5-ZEROS2", fillcolor="#cccccc"];
Reshape_48 [label="Reshape(., .)", fillcolor="#eeeeee"];
Mul_49 [label="Mul(., .)", fillcolor="#cccccc"];
Sin_50 [label="Sin(.)", fillcolor="#cccccc"];
Squeeze_51 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
Cos_52 [label="Cos(.)", fillcolor="#cccccc"];
Squeeze_53 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
MatMul_54 [label="MatMul(., .)", fillcolor="#ee9999"];
RotaryEmbedding_55 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
Reshape_56 [label="Reshape(., [0, 0, 2, 96])", fillcolor="#eeeeee"];
Transpose_57 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_58 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_59 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
RotaryEmbedding_60 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
MatMul_61 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_62 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
Concat_63 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_64 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_65 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_66 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_67 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
Unsqueeze_68 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_69 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_70 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
LocalAttention_to1_71 [label="intermediate.\nLocalAttention_to1(., ., ., ., [0.31947157])", fillcolor="#cccccc"];
Transpose_72 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Reshape_73 [label="Reshape(., [0, 0, 192])", fillcolor="#eeeeee"];
MatMul_74 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_75 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
MatMul_76 [label="MatMul(., .)", fillcolor="#ee9999"];
QuickGelu_77 [label="com.microsoft.\nQuickGelu(.)", fillcolor="#cccccc"];
MatMul_78 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_79 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_80 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_81 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
MatMul_82 [label="MatMul(., .)", fillcolor="#ee9999"];
I_2 -> Cast_18 [label="INT64(batch,seq_length)"];
Cast_18 -> Unsqueeze_19 [label="FLOAT(batch,seq_length)"];
I_0 -> Shape_20 [label="INT64(batch,seq_length)"];
Shape_20 -> Squeeze_21 [label="INT64(1)"];
Squeeze_21 -> Range_22 [label="INT64()"];
I_2 -> Shape_23 [label="INT64(batch,seq_length)"];
Shape_23 -> Concat_24 [label="INT64(1)"];
Range_22 -> Expand_25 [label="INT64(seq_length)"];
Concat_24 -> Expand_25 [label="INT64(2)"];
Shape_23 -> Squeeze_26 [label="INT64(1)"];
I_3 -> Shape_27 [label="FLOAT(batch,1,cache_length,96)"];
Shape_27 -> Add_28 [label="INT64(1)"];
Shape_20 -> Add_28 [label="INT64(1)"];
Add_28 -> Squeeze_29 [label="INT64(1)"];
Squeeze_29 -> Range_30 [label="INT64()"];
Range_30 -> Unsqueeze_31 [label="INT64(cache_length+seq_length)"];
Shape_27 -> CausalMask_32 [label="INT64(1)"];
Add_28 -> CausalMask_32 [label="INT64(1)"];
i_14 -> Gather_33 [label="FLOAT(32000, 192)"];
I_0 -> Gather_33 [label="INT64(batch,seq_length)"];
Gather_33 -> SimplifiedLayerNormalization_34 [label="FLOAT(batch,seq_length,192)"];
i_15 -> SimplifiedLayerNormalization_34 [label="FLOAT(192)"];
I_1 -> Cast_35 [label="INT64(batch,cache+seq)"];
Squeeze_26 -> Range_36 [label="INT64()"];
Cast_35 -> Shape_37 [label="BOOL(batch,cache+seq)"];
Range_36 -> Mul_38 [label="INT64(batch)"];
Shape_37 -> Mul_38 [label="INT64(1)"];
Mul_38 -> Unsqueeze_39 [label="INT64(batch)"];
Unsqueeze_31 -> Add_40 [label="INT64(1,1,1,cache_length+seq_length)"];
Unsqueeze_39 -> Add_40 [label="INT64(batch,1,1,1)"];
Cast_35 -> Reshape_41 [label="BOOL(batch,cache+seq)"];
Add_40 -> Reshape_42 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
Reshape_41 -> Gather_43 [label="BOOL(batch*(cache+seq))"];
Reshape_42 -> Gather_43 [label="INT64(batch*(cache_length+seq_length))"];
Add_40 -> Shape_44 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
Gather_43 -> Reshape_45 [label="BOOL(batch*(cache_length+seq_length))"];
Shape_44 -> Reshape_45 [label="INT64(4)"];
CausalMask_32 -> And_46 [label="BOOL(1,\n1,\nseq_length,\ncache_length+seq_length)"];
Reshape_45 -> And_46 [label="BOOL(batch,\n1,\n1,\ncache_length+seq_length)"];
Unsqueeze_19 -> Reshape_48 [label="FLOAT(batch,1,seq_length)"];
Constant_47 -> Reshape_48 [label="INT64(3)"];
i_13 -> Mul_49 [label="FLOAT(1, 1, 48)"];
Reshape_48 -> Mul_49 [label="FLOAT(batch,seq_length,1)"];
Mul_49 -> Sin_50 [label="FLOAT(batch,seq_length,48)"];
Sin_50 -> Squeeze_51 [label="FLOAT(batch,seq_length,48)"];
Mul_49 -> Cos_52 [label="FLOAT(batch,seq_length,48)"];
Cos_52 -> Squeeze_53 [label="FLOAT(batch,seq_length,48)"];
SimplifiedLayerNormalization_34 -> MatMul_54 [label="FLOAT(batch,seq_length,192)"];
i_5 -> MatMul_54 [label="FLOAT(192, 192)"];
MatMul_54 -> RotaryEmbedding_55 [label="FLOAT(batch,seq_length,192)"];
Expand_25 -> RotaryEmbedding_55 [label="INT64(batch,seq_length)"];
Squeeze_53 -> RotaryEmbedding_55 [label="FLOAT(seq_length,48)"];
Squeeze_51 -> RotaryEmbedding_55 [label="FLOAT(seq_length,48)"];
RotaryEmbedding_55 -> Reshape_56 [label="FLOAT(batch,seq_length,192)"];
Reshape_56 -> Transpose_57 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_34 -> MatMul_58 [label="FLOAT(batch,seq_length,192)"];
i_6 -> MatMul_58 [label="FLOAT(192, 96)"];
MatMul_58 -> Unsqueeze_59 [label="FLOAT(batch,seq_length,96)"];
Unsqueeze_59 -> RotaryEmbedding_60 [label="FLOAT(batch,1,seq_length,96)"];
Expand_25 -> RotaryEmbedding_60 [label="INT64(batch,seq_length)"];
Squeeze_53 -> RotaryEmbedding_60 [label="FLOAT(seq_length,48)"];
Squeeze_51 -> RotaryEmbedding_60 [label="FLOAT(seq_length,48)"];
SimplifiedLayerNormalization_34 -> MatMul_61 [label="FLOAT(batch,seq_length,192)"];
i_7 -> MatMul_61 [label="FLOAT(192, 96)"];
MatMul_61 -> Unsqueeze_62 [label="FLOAT(batch,seq_length,96)"];
I_3 -> Concat_63 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_60 -> Concat_63 [label="FLOAT(batch,1,seq_length,96)"];
I_4 -> Concat_64 [label="FLOAT(batch,1,cache_length,96)"];
Unsqueeze_62 -> Concat_64 [label="FLOAT(batch,1,seq_length,96)"];
Concat_63 -> Unsqueeze_65;
Unsqueeze_65 -> Expand_66 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_66 -> Squeeze_67 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Concat_64 -> Unsqueeze_68;
Unsqueeze_68 -> Expand_69 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_69 -> Squeeze_70 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Transpose_57 -> LocalAttention_to1_71 [label="FLOAT(batch,2,seq_length,96)"];
Squeeze_67 -> LocalAttention_to1_71 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
Squeeze_70 -> LocalAttention_to1_71 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
And_46 -> LocalAttention_to1_71 [label="BOOL(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
LocalAttention_to1_71 -> Transpose_72 [label="FLOAT(batch,2,seq_length,96)"];
Transpose_72 -> Reshape_73 [label="FLOAT(batch,seq_length,2,96)"];
Reshape_73 -> MatMul_74 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_74 [label="FLOAT(192, 192)"];
Gather_33 -> SkipSimplifiedLayerNormalization_75 [label="FLOAT(batch,seq_length,192)"];
MatMul_74 -> SkipSimplifiedLayerNormalization_75 [label="FLOAT(batch,seq_length,192)"];
i_16 -> SkipSimplifiedLayerNormalization_75 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_75 -> MatMul_76 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_76 [label="FLOAT(192, 1024)"];
MatMul_76 -> QuickGelu_77 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_75 -> MatMul_78 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_78 [label="FLOAT(192, 1024)"];
QuickGelu_77 -> Mul_79 [label="FLOAT(batch,seq_length,1024)"];
MatMul_78 -> Mul_79 [label="FLOAT(batch,seq_length,1024)"];
Mul_79 -> MatMul_80 [label="FLOAT(batch,seq_length,1024)"];
i_11 -> MatMul_80 [label="FLOAT(1024, 192)"];
SkipSimplifiedLayerNormalization_75 -> SkipSimplifiedLayerNormalization_81 [label="FLOAT(batch,seq_length,192)"];
MatMul_80 -> SkipSimplifiedLayerNormalization_81 [label="FLOAT(batch,seq_length,192)"];
i_17 -> SkipSimplifiedLayerNormalization_81 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_81 -> MatMul_82 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_82 [label="FLOAT(192, 32000)"];
O_83 [label="output_0\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_82 -> O_83;
O_84 [label="present_key_values_key_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_63 -> O_84;
O_85 [label="present_key_values_value_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_64 -> O_85;
}](../../_images/graphviz-0f8c9c65ddef12a7281cdfac6114d2021b27f5dc.png)
Or this one obtained with torch.onnx.export().
![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_6 [label="val_343\nFLOAT(1, 48)", fillcolor="#cccc00"];
i_7 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_8 [label="val_119\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="val_126\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_10 [label="val_133\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_11 [label="val_287\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_12 [label="val_292\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_13 [label="val_294\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_14 [label="val_295\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_15 [label="val_320\nFLOAT(192, 32000)", fillcolor="#cccc00"];
Shape_16 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_17 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_18 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Squeeze_19 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_22 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_24 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
Add_26 [label="Add(., .)", fillcolor="#cccccc"];
Max_27 [label="Max(., .)", fillcolor="#cccccc"];
Add_28 [label="Add(., .)", fillcolor="#cccccc"];
Add_29 [label="Add(., .)", fillcolor="#cccccc"];
Gather_30 [label="Gather(., ., axis=0)", fillcolor="#cccccc"];
Range_31 [label="Range(., ., 1)", fillcolor="#cccccc"];
Cast_32 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Range_33 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_35 [label="Unsqueeze(., [1, 2])", fillcolor="#eeeeee"];
Unsqueeze_36 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
Unsqueeze_37 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
Unsqueeze_38 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
Unsqueeze_39 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
Unsqueeze_40 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
LessOrEqual_41 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
And_42 [label="And(True, .)", fillcolor="#cccccc"];
Max_43 [label="Max(., .)", fillcolor="#cccccc"];
Shape_44 [label="Shape(., start=0)", fillcolor="#d2a81f"];
Expand_45 [label="Expand(., .)", fillcolor="#cccccc"];
Unsqueeze_46 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Expand_47 [label="Expand(., .)", fillcolor="#cccccc"];
Unsqueeze_48 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Concat_49 [label="Concat(., ., axis=-1)", fillcolor="#cccccc"];
GatherND_50 [label="GatherND(., .)", fillcolor="#cccccc"];
And_51 [label="And(., .)", fillcolor="#cccccc"];
Reshape_52 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_53 [label="Concat(., [1], ., ., axis=0)", fillcolor="#cccccc"];
Expand_54 [label="Expand(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_55 [label="SimplifiedLayerNormalization(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
MatMul_56 [label="MatMul(., .)", fillcolor="#ee9999"];
Concat_57 [label="Concat(., ., [-1], [96], axis=0)", fillcolor="#cccccc"];
Reshape_58 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_59 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_60 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_61 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_62 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_63 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_64 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_65 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
ReduceMax_66 [label="ReduceMax(.)", fillcolor="#cccccc"];
Add_67 [label="Add(., 1)", fillcolor="#cccccc"];
Range_68 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Reshape_69 [label="Reshape(., [-1, 1])", fillcolor="#eeeeee"];
Cast_70 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
MatMul_71 [label="MatMul(., .)", fillcolor="#ee9999"];
Cos_72 [label="Cos(.)", fillcolor="#cccccc"];
Sin_73 [label="Sin(.)", fillcolor="#cccccc"];
RotaryEmbedding_74 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
RotaryEmbedding_75 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
Concat_76 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_77 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_78 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_79 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_80 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_81 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_82 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_83 [label="Reshape(., .)", fillcolor="#eeeeee"];
Unsqueeze_84 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_85 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_86 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_87 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_88 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_89 [label="Reshape(., .)", fillcolor="#eeeeee"];
Slice_90 [label="Slice(., [0], ., [3], [1])", fillcolor="#cccccc"];
Shape_91 [label="Shape(., start=0)", fillcolor="#d2a81f"];
Slice_92 [label="Slice(., [-1], [9223372036854775807])", fillcolor="#cccccc"];
Slice_93 [label="Slice(., [-2], [-1])", fillcolor="#cccccc"];
Slice_94 [label="Slice(., [-9223372036854775808], [-2])", fillcolor="#cccccc"];
Concat_95 [label="Concat([-1], ., ., axis=0)", fillcolor="#cccccc"];
Reshape_96 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_97 [label="Transpose(., perm=[0, 2, 1])", fillcolor="#ee99ee"];
Concat_98 [label="Concat(., ., ., axis=0)", fillcolor="#cccccc"];
Reshape_99 [label="Reshape(., .)", fillcolor="#eeeeee"];
Mul_100 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Mul_101 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Where_102 [label="Where(., 0.0, -inf)", fillcolor="#cccccc"];
MatMul_103 [label="MatMul(., .)", fillcolor="#ee9999"];
Add_104 [label="Add(., .)", fillcolor="#cccccc"];
Softmax_105 [label="Softmax(., axis=-1)", fillcolor="#cccccc"];
IsNaN_106 [label="IsNaN(.)", fillcolor="#cccccc"];
Where_107 [label="Where(., 0.0, .)", fillcolor="#cccccc"];
MatMul_108 [label="MatMul(., .)", fillcolor="#ee9999"];
Transpose_109 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Concat_110 [label="Concat(., ., [-1], axis=0)", fillcolor="#cccccc"];
Reshape_111 [label="Reshape(., .)", fillcolor="#eeeeee"];
MatMul_112 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_113 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
MatMul_114 [label="MatMul(., .)", fillcolor="#ee9999"];
Sigmoid_115 [label="Sigmoid(.)", fillcolor="#cccccc"];
Mul_116 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_117 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_118 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_119 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_120 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
MatMul_121 [label="MatMul(., .)", fillcolor="#ee9999"];
I_0 -> Shape_16 [label="INT64(batch,seq_length)"];
Shape_16 -> Squeeze_17 [label="INT64(1)"];
I_2 -> Shape_18 [label="INT64(batch,seq_length)"];
Shape_18 -> Squeeze_19 [label="INT64(1)"];
I_2 -> Shape_20 [label="INT64(batch,seq_length)"];
Shape_20 -> Squeeze_21 [label="INT64(1)"];
I_3 -> Shape_22 [label="FLOAT(batch,1,cache_length,96)"];
Shape_22 -> Squeeze_23 [label="INT64(1)"];
I_4 -> Shape_24 [label="FLOAT(batch,1,cache_length,96)"];
Shape_24 -> Squeeze_25 [label="INT64(1)"];
Squeeze_23 -> Add_26 [label="INT64()"];
Squeeze_17 -> Add_26 [label="INT64()"];
Squeeze_17 -> Max_27 [label="INT64()"];
Squeeze_21 -> Max_27 [label="INT64()"];
Squeeze_23 -> Add_28 [label="INT64()"];
Max_27 -> Add_28 [label="INT64()"];
Squeeze_25 -> Add_29 [label="INT64()"];
Squeeze_17 -> Add_29 [label="INT64()"];
i_7 -> Gather_30 [label="FLOAT(32000, 192)"];
I_0 -> Gather_30 [label="INT64(batch,seq_length)"];
Squeeze_23 -> Range_31 [label="INT64()"];
Add_26 -> Range_31 [label="INT64()"];
I_1 -> Cast_32 [label="INT64(batch,cache+seq)"];
Squeeze_19 -> Range_33 [label="INT64()"];
Add_26 -> Range_34 [label="INT64()"];
Range_33 -> Unsqueeze_35 [label="INT64(batch)"];
Unsqueeze_35 -> Unsqueeze_36 [label="INT64(batch,1,1)"];
Range_31 -> Unsqueeze_37 [label="INT64(seq_length)"];
Unsqueeze_37 -> Unsqueeze_38 [label="INT64(1,1,seq_length)"];
Range_34 -> Unsqueeze_39 [label="INT64(cache_length + seq_length)"];
Unsqueeze_39 -> Unsqueeze_40 [label="INT64(1,1,cache_length + seq_length)"];
Unsqueeze_40 -> LessOrEqual_41 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_38 -> LessOrEqual_41 [label="INT64(1,1,seq_length,1)"];
LessOrEqual_41 -> And_42 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
Unsqueeze_36 -> Max_43 [label="INT64(batch,1,1,1)"];
Unsqueeze_40 -> Max_43 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Max_43 -> Shape_44 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_36 -> Expand_45 [label="INT64(batch,1,1,1)"];
Shape_44 -> Expand_45 [label="INT64(4)"];
Expand_45 -> Unsqueeze_46 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_40 -> Expand_47 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Shape_44 -> Expand_47 [label="INT64(4)"];
Expand_47 -> Unsqueeze_48 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_46 -> Concat_49 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
Unsqueeze_48 -> Concat_49 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
Cast_32 -> GatherND_50 [label="BOOL(batch,cache+seq)"];
Concat_49 -> GatherND_50 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n2)"];
And_42 -> And_51 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
GatherND_50 -> And_51 [label="BOOL(batch,\n1,\n1,\ncache_length + seq_length)"];
Add_26 -> Reshape_52 [label="INT64()"];
Shape_18 -> Concat_53 [label="INT64(1)"];
Shape_16 -> Concat_53 [label="INT64(1)"];
Reshape_52 -> Concat_53 [label="INT64(1)"];
And_51 -> Expand_54 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Concat_53 -> Expand_54 [label="INT64(4)"];
Gather_30 -> SimplifiedLayerNormalization_55 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SimplifiedLayerNormalization_55 [label="FLOAT(192)"];
SimplifiedLayerNormalization_55 -> MatMul_56 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_56 [label="FLOAT(192, 192)"];
Shape_18 -> Concat_57 [label="INT64(1)"];
Shape_16 -> Concat_57 [label="INT64(1)"];
MatMul_56 -> Reshape_58 [label="FLOAT(batch,seq_length,192)"];
Concat_57 -> Reshape_58 [label="INT64(4)"];
Reshape_58 -> Transpose_59 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_55 -> MatMul_60 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_60 [label="FLOAT(192, 96)"];
MatMul_60 -> Reshape_61 [label="FLOAT(batch,seq_length,96)"];
Concat_57 -> Reshape_61 [label="INT64(4)"];
Reshape_61 -> Transpose_62 [label="FLOAT(batch,seq_length,1,96)"];
SimplifiedLayerNormalization_55 -> MatMul_63 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_63 [label="FLOAT(192, 96)"];
MatMul_63 -> Reshape_64 [label="FLOAT(batch,seq_length,96)"];
Concat_57 -> Reshape_64 [label="INT64(4)"];
Reshape_64 -> Transpose_65 [label="FLOAT(batch,seq_length,1,96)"];
I_2 -> ReduceMax_66 [label="INT64(batch,seq_length)"];
ReduceMax_66 -> Add_67 [label="INT64()"];
Add_67 -> Range_68 [label="INT64()"];
Range_68 -> Reshape_69 [label="INT64(?)"];
Reshape_69 -> Cast_70 [label="INT64(?,1)"];
Cast_70 -> MatMul_71 [label="FLOAT(?,1)"];
i_6 -> MatMul_71 [label="FLOAT(1, 48)"];
MatMul_71 -> Cos_72 [label="FLOAT(?,48)"];
MatMul_71 -> Sin_73 [label="FLOAT(?,48)"];
Transpose_59 -> RotaryEmbedding_74 [label="FLOAT(batch,2,seq_length,96)"];
I_2 -> RotaryEmbedding_74 [label="INT64(batch,seq_length)"];
Cos_72 -> RotaryEmbedding_74 [label="FLOAT(?,48)"];
Sin_73 -> RotaryEmbedding_74 [label="FLOAT(?,48)"];
Transpose_62 -> RotaryEmbedding_75 [label="FLOAT(batch,1,seq_length,96)"];
I_2 -> RotaryEmbedding_75 [label="INT64(batch,seq_length)"];
Cos_72 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
Sin_73 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
I_3 -> Concat_76 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_75 -> Concat_76 [label="FLOAT(batch,\n1,\nMax(seq_length, seq_length),\n96)"];
I_4 -> Concat_77 [label="FLOAT(batch,1,cache_length,96)"];
Transpose_65 -> Concat_77 [label="FLOAT(batch,1,seq_length,96)"];
Concat_76 -> Unsqueeze_78;
Add_28 -> Reshape_79 [label="INT64()"];
Shape_18 -> Concat_80 [label="INT64(1)"];
Reshape_79 -> Concat_80 [label="INT64(1)"];
Unsqueeze_78 -> Expand_81 [label="FLOAT(batch,\n1,\n1,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_80 -> Expand_81 [label="INT64(5)"];
Shape_18 -> Concat_82 [label="INT64(1)"];
Reshape_79 -> Concat_82 [label="INT64(1)"];
Expand_81 -> Reshape_83 [label="FLOAT(batch,\n1,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_82 -> Reshape_83 [label="INT64(4)"];
Concat_77 -> Unsqueeze_84;
Add_29 -> Reshape_85 [label="INT64()"];
Shape_18 -> Concat_86 [label="INT64(1)"];
Reshape_85 -> Concat_86 [label="INT64(1)"];
Unsqueeze_84 -> Expand_87 [label="FLOAT(batch,\n1,\n1,\ncache_length + seq_length,\n96)"];
Concat_86 -> Expand_87 [label="INT64(5)"];
Shape_18 -> Concat_88 [label="INT64(1)"];
Reshape_85 -> Concat_88 [label="INT64(1)"];
Expand_87 -> Reshape_89 [label="FLOAT(batch,\n1,\n2,\ncache_length + seq_length,\n96)"];
Concat_88 -> Reshape_89 [label="INT64(4)"];
Expand_54 -> Slice_90 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Reshape_79 -> Slice_90 [label="INT64(1)"];
Reshape_83 -> Shape_91 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Shape_91 -> Slice_92 [label="INT64(4)"];
Shape_91 -> Slice_93 [label="INT64(4)"];
Shape_91 -> Slice_94 [label="INT64(4)"];
Slice_93 -> Concat_95 [label="INT64(1)"];
Slice_92 -> Concat_95 [label="INT64(1)"];
Reshape_83 -> Reshape_96 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_95 -> Reshape_96 [label="INT64(3)"];
Reshape_96 -> Transpose_97 [label="FLOAT(?,\ncache_length + Max(seq_length, seq_length),\n96)"];
Slice_94 -> Concat_98 [label="INT64(2)"];
Slice_92 -> Concat_98 [label="INT64(1)"];
Slice_93 -> Concat_98 [label="INT64(1)"];
Transpose_97 -> Reshape_99 [label="FLOAT(?,\n96,\ncache_length + Max(seq_length, seq_length))"];
Concat_98 -> Reshape_99 [label="INT64(4)"];
RotaryEmbedding_74 -> Mul_100 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Reshape_99 -> Mul_101 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
Slice_90 -> Where_102 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Mul_100 -> MatMul_103 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Mul_101 -> MatMul_103 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
MatMul_103 -> Add_104 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\ncache_length + Max(seq_length, seq_length))"];
Where_102 -> Add_104 [label="FLOAT(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Add_104 -> Softmax_105 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_105 -> IsNaN_106 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
IsNaN_106 -> Where_107 [label="BOOL(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_105 -> Where_107 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Where_107 -> MatMul_108 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Reshape_89 -> MatMul_108 [label="FLOAT(batch,\n2,\ncache_length + seq_length,\n96)"];
MatMul_108 -> Transpose_109 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Shape_18 -> Concat_110 [label="INT64(1)"];
Shape_16 -> Concat_110 [label="INT64(1)"];
Transpose_109 -> Reshape_111 [label="FLOAT(batch,\nMax(seq_length, seq_length),\n2,\n96)"];
Concat_110 -> Reshape_111 [label="INT64(3)"];
Reshape_111 -> MatMul_112 [label="FLOAT(batch,\nseq_length,\n((192*Max(seq_length, seq_length))//seq_length))"];
i_11 -> MatMul_112 [label="FLOAT(192, 192)"];
MatMul_112 -> SkipSimplifiedLayerNormalization_113 [label="FLOAT(batch,seq_length,192)"];
Gather_30 -> SkipSimplifiedLayerNormalization_113 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_113 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_113 -> MatMul_114 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_114 [label="FLOAT(192, 1024)"];
MatMul_114 -> Sigmoid_115 [label="FLOAT(batch,seq_length,1024)"];
MatMul_114 -> Mul_116 [label="FLOAT(batch,seq_length,1024)"];
Sigmoid_115 -> Mul_116 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_113 -> MatMul_117 [label="FLOAT(batch,seq_length,192)"];
i_13 -> MatMul_117 [label="FLOAT(192, 1024)"];
Mul_116 -> Mul_118 [label="FLOAT(batch,seq_length,1024)"];
MatMul_117 -> Mul_118 [label="FLOAT(batch,seq_length,1024)"];
Mul_118 -> MatMul_119 [label="FLOAT(batch,seq_length,1024)"];
i_14 -> MatMul_119 [label="FLOAT(1024, 192)"];
MatMul_119 -> SkipSimplifiedLayerNormalization_120 [label="FLOAT(batch,seq_length,192)"];
SkipSimplifiedLayerNormalization_113 -> SkipSimplifiedLayerNormalization_120 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_120 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_120 -> MatMul_121 [label="FLOAT(batch,seq_length,192)"];
i_15 -> MatMul_121 [label="FLOAT(192, 32000)"];
O_122 [label="linear_7\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_121 -> O_122;
O_123 [label="cat_7\nFLOAT(batch,1,cache_length + Max(seq_length, seq_length),96)", fillcolor="#aaaaee"];
Concat_76 -> O_123;
O_124 [label="cat_8\nFLOAT(batch,1,cache_length + seq_length,96)", fillcolor="#aaaaee"];
Concat_77 -> O_124;
}](../../_images/graphviz-e7e8e9ab1c4047f4897867fe38bcc23ca205ae0b.png)