onnx_diagnostic.helpers.dot_helper¶
- onnx_diagnostic.helpers.dot_helper.to_dot(model: ModelProto) str[source][source]¶
Converts a model into a dot graph. Here is an example:
![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="p_model_layers_0_self_attn_q_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_6 [label="p_model_layers_0_self_attn_k_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_7 [label="p_model_layers_0_self_attn_v_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_8 [label="p_model_layers_0_self_attn_o_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="p_model_layers_0_mlp_gate_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_10 [label="p_model_layers_0_mlp_up_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_11 [label="p_model_layers_0_mlp_down_proj_weight::T10\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_12 [label="p_lm_head_weight::T10\nFLOAT(192, 32000)", fillcolor="#cccc00"];
i_13 [label="to_422\nFLOAT(1, 1, 48)", fillcolor="#cccc00"];
i_14 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_15 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_16 [label="model.layers.0.post_attention_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_17 [label="model.norm.weight\nFLOAT(192)", fillcolor="#cccc00"];
ReduceMax_18 [label="ReduceMax(.)", fillcolor="#cccccc"];
Add_19 [label="Add(., 1)", fillcolor="#cccccc"];
Range_20 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_21 [label="Unsqueeze(., [0])", fillcolor="#eeeeee"];
Shape_22 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_24 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_26 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Add_27 [label="Add(., .)", fillcolor="#cccccc"];
Squeeze_28 [label="Squeeze(.)", fillcolor="#eeeeee"];
Gather_29 [label="Gather(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_30 [label="SimplifiedLayerNormalization\n(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
Range_31 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Add_32 [label="Add(., .)", fillcolor="#cccccc"];
Cast_33 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Range_35 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_36 [label="Unsqueeze(., [1, 2, 3])", fillcolor="#eeeeee"];
Unsqueeze_37 [label="Unsqueeze(., [0, 1, 3])", fillcolor="#eeeeee"];
Unsqueeze_38 [label="Unsqueeze(., [0, 1, 2])", fillcolor="#eeeeee"];
LessOrEqual_39 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
Shape_40 [label="Shape(., start=-1)", fillcolor="#d2a81f"];
Mul_41 [label="Mul(., .)", fillcolor="#cccccc"];
Add_42 [label="Add(., .)", fillcolor="#cccccc"];
Reshape_43 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Reshape_44 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Gather_45 [label="Gather(., .)", fillcolor="#cccccc"];
Shape_46 [label="Shape(.)", fillcolor="#d2a81f"];
Reshape_47 [label="Reshape(., .)", fillcolor="#eeeeee"];
And_48 [label="And(., .)", fillcolor="#cccccc"];
CosSinCache_p1_49 [label="intermediate.\nCosSinCache_p1(., .)", fillcolor="#cccccc"];
Squeeze_50 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
Squeeze_51 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
MatMul_52 [label="MatMul(., .)", fillcolor="#ee9999"];
RotaryEmbedding_53 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
Reshape_54 [label="Reshape(., [0, 0, 2, 96])", fillcolor="#eeeeee"];
Transpose_55 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_56 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_57 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
RotaryEmbedding_58 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
MatMul_59 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_60 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
Concat_61 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_62 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_63 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_64 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_65 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
Unsqueeze_66 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_67 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_68 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
LocalAttention_to1_69 [label="intermediate.\nLocalAttention_to1\n(., ., ., ., [0.31947157])", fillcolor="#cccccc"];
Transpose_70 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Reshape_71 [label="Reshape(., [0, 0, 192])", fillcolor="#eeeeee"];
MatMul_72 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_73 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_74 [label="MatMul(., .)", fillcolor="#ee9999"];
QuickGelu_75 [label="com.microsoft.\nQuickGelu(.)", fillcolor="#cccccc"];
MatMul_76 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_77 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_78 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_79 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_80 [label="MatMul(., .)", fillcolor="#ee9999"];
I_2 -> ReduceMax_18 [label="INT64(batch,seq_length)"];
ReduceMax_18 -> Add_19 [label="INT64()"];
Add_19 -> Range_20 [label="INT64()"];
Range_20 -> Unsqueeze_21 [label="INT64(NEWDIM_range1)"];
I_0 -> Shape_22 [label="INT64(batch,seq_length)"];
Shape_22 -> Squeeze_23 [label="INT64(1)"];
I_2 -> Shape_24 [label="INT64(batch,seq_length)"];
Shape_24 -> Squeeze_25 [label="INT64(1)"];
I_3 -> Shape_26 [label="FLOAT(batch,1,cache_length,96)"];
Shape_26 -> Add_27 [label="INT64(1)"];
Shape_22 -> Add_27 [label="INT64(1)"];
Add_27 -> Squeeze_28 [label="INT64(1)"];
i_14 -> Gather_29 [label="FLOAT(32000, 192)"];
I_0 -> Gather_29 [label="INT64(batch,seq_length)"];
Gather_29 -> SimplifiedLayerNormalization_30 [label="FLOAT(batch,seq_length,192)"];
i_15 -> SimplifiedLayerNormalization_30 [label="FLOAT(192)"];
Squeeze_23 -> Range_31 [label="INT64()"];
Range_31 -> Add_32 [label="INT64(seq_length)"];
Shape_26 -> Add_32 [label="INT64(1)"];
I_1 -> Cast_33 [label="INT64(batch,cache+seq)"];
Squeeze_25 -> Range_34 [label="INT64()"];
Squeeze_28 -> Range_35 [label="INT64()"];
Range_34 -> Unsqueeze_36 [label="INT64(batch)"];
Add_32 -> Unsqueeze_37 [label="INT64(seq_length)"];
Range_35 -> Unsqueeze_38 [label="INT64(cache_length+seq_length)"];
Unsqueeze_38 -> LessOrEqual_39 [label="INT64(1,1,1,cache_length+seq_length)"];
Unsqueeze_37 -> LessOrEqual_39 [label="INT64(1,1,seq_length,1)"];
Cast_33 -> Shape_40 [label="BOOL(batch,cache+seq)"];
Unsqueeze_36 -> Mul_41 [label="INT64(batch,1,1,1)"];
Shape_40 -> Mul_41 [label="INT64(1)"];
Unsqueeze_38 -> Add_42 [label="INT64(1,1,1,cache_length+seq_length)"];
Mul_41 -> Add_42 [label="INT64(batch,1,1,1)"];
Cast_33 -> Reshape_43 [label="BOOL(batch,cache+seq)"];
Add_42 -> Reshape_44 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
Reshape_43 -> Gather_45 [label="BOOL(batch*(cache+seq))"];
Reshape_44 -> Gather_45 [label="INT64(batch*(cache_length+seq_length))"];
Add_42 -> Shape_46 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
Gather_45 -> Reshape_47 [label="BOOL(batch*(cache_length+seq_length))"];
Shape_46 -> Reshape_47 [label="INT64(4)"];
LessOrEqual_39 -> And_48 [label="BOOL(1,\n1,\nseq_length,\ncache_length+seq_length)"];
Reshape_47 -> And_48 [label="BOOL(batch,\n1,\n1,\ncache_length+seq_length)"];
Unsqueeze_21 -> CosSinCache_p1_49 [label="INT64(1,NEWDIM_range1)"];
i_13 -> CosSinCache_p1_49 [label="FLOAT(1, 1, 48)"];
CosSinCache_p1_49 -> Squeeze_50 [label="FLOAT(1,NEWDIM_range1,48)"];
CosSinCache_p1_49 -> Squeeze_51 [label="FLOAT(1,NEWDIM_range1,48)"];
SimplifiedLayerNormalization_30 -> MatMul_52 [label="FLOAT(batch,seq_length,192)"];
i_5 -> MatMul_52 [label="FLOAT(192, 192)"];
MatMul_52 -> RotaryEmbedding_53 [label="FLOAT(batch,seq_length,192)"];
I_2 -> RotaryEmbedding_53 [label="INT64(batch,seq_length)"];
Squeeze_51 -> RotaryEmbedding_53 [label="FLOAT(NEWDIM_range1,48)"];
Squeeze_50 -> RotaryEmbedding_53 [label="FLOAT(NEWDIM_range1,48)"];
RotaryEmbedding_53 -> Reshape_54 [label="FLOAT(batch,seq_length,192)"];
Reshape_54 -> Transpose_55 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_30 -> MatMul_56 [label="FLOAT(batch,seq_length,192)"];
i_6 -> MatMul_56 [label="FLOAT(192, 96)"];
MatMul_56 -> Unsqueeze_57 [label="FLOAT(batch,seq_length,96)"];
Unsqueeze_57 -> RotaryEmbedding_58 [label="FLOAT(batch,1,seq_length,96)"];
I_2 -> RotaryEmbedding_58 [label="INT64(batch,seq_length)"];
Squeeze_51 -> RotaryEmbedding_58 [label="FLOAT(NEWDIM_range1,48)"];
Squeeze_50 -> RotaryEmbedding_58 [label="FLOAT(NEWDIM_range1,48)"];
SimplifiedLayerNormalization_30 -> MatMul_59 [label="FLOAT(batch,seq_length,192)"];
i_7 -> MatMul_59 [label="FLOAT(192, 96)"];
MatMul_59 -> Unsqueeze_60 [label="FLOAT(batch,seq_length,96)"];
I_3 -> Concat_61 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_58 -> Concat_61 [label="FLOAT(batch,1,seq_length,96)"];
I_4 -> Concat_62 [label="FLOAT(batch,1,cache_length,96)"];
Unsqueeze_60 -> Concat_62 [label="FLOAT(batch,1,seq_length,96)"];
Concat_61 -> Unsqueeze_63;
Unsqueeze_63 -> Expand_64 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_64 -> Squeeze_65 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Concat_62 -> Unsqueeze_66;
Unsqueeze_66 -> Expand_67 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_67 -> Squeeze_68 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Transpose_55 -> LocalAttention_to1_69 [label="FLOAT(batch,2,seq_length,96)"];
Squeeze_65 -> LocalAttention_to1_69 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
Squeeze_68 -> LocalAttention_to1_69 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
And_48 -> LocalAttention_to1_69 [label="BOOL(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
LocalAttention_to1_69 -> Transpose_70 [label="FLOAT(batch,2,seq_length,96)"];
Transpose_70 -> Reshape_71 [label="FLOAT(batch,seq_length,2,96)"];
Reshape_71 -> MatMul_72 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_72 [label="FLOAT(192, 192)"];
Gather_29 -> SkipSimplifiedLayerNormalization_73 [label="FLOAT(batch,seq_length,192)"];
MatMul_72 -> SkipSimplifiedLayerNormalization_73 [label="FLOAT(batch,seq_length,192)"];
i_16 -> SkipSimplifiedLayerNormalization_73 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_73 -> MatMul_74 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_74 [label="FLOAT(192, 1024)"];
MatMul_74 -> QuickGelu_75 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_73 -> MatMul_76 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_76 [label="FLOAT(192, 1024)"];
QuickGelu_75 -> Mul_77 [label="FLOAT(batch,seq_length,1024)"];
MatMul_76 -> Mul_77 [label="FLOAT(batch,seq_length,1024)"];
Mul_77 -> MatMul_78 [label="FLOAT(batch,seq_length,1024)"];
i_11 -> MatMul_78 [label="FLOAT(1024, 192)"];
SkipSimplifiedLayerNormalization_73 -> SkipSimplifiedLayerNormalization_79 [label="FLOAT(batch,seq_length,192)"];
MatMul_78 -> SkipSimplifiedLayerNormalization_79 [label="FLOAT(batch,seq_length,192)"];
i_17 -> SkipSimplifiedLayerNormalization_79 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_79 -> MatMul_80 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_80 [label="FLOAT(192, 32000)"];
O_81 [label="output_0\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_80 -> O_81;
O_82 [label="present_key_values_key_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_61 -> O_82;
O_83 [label="present_key_values_value_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_62 -> O_83;
}](../../_images/graphviz-bd6b057e4459e5db66474b38f3e6216c8a901225.png)
Or this one obtained with
torch.onnx.export().![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_6 [label="val_344\nFLOAT(1, 48)", fillcolor="#cccc00"];
i_7 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_8 [label="val_120\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="val_127\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_10 [label="val_134\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_11 [label="val_288\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_12 [label="val_293\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_13 [label="val_295\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_14 [label="val_296\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_15 [label="val_321\nFLOAT(192, 32000)", fillcolor="#cccc00"];
Shape_16 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_17 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_18 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Squeeze_19 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_22 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_24 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
Add_26 [label="Add(., .)", fillcolor="#cccccc"];
Max_27 [label="Max(., .)", fillcolor="#cccccc"];
Add_28 [label="Add(., .)", fillcolor="#cccccc"];
Add_29 [label="Add(., .)", fillcolor="#cccccc"];
Gather_30 [label="Gather(., ., axis=0)", fillcolor="#cccccc"];
Range_31 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Add_32 [label="Add(., .)", fillcolor="#cccccc"];
Cast_33 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Range_35 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_36 [label="Unsqueeze(., [1, 2])", fillcolor="#eeeeee"];
Unsqueeze_37 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
Unsqueeze_38 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
Unsqueeze_39 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
Unsqueeze_40 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
Unsqueeze_41 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
LessOrEqual_42 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
And_43 [label="And(True, .)", fillcolor="#cccccc"];
Max_44 [label="Max(., .)", fillcolor="#cccccc"];
Shape_45 [label="Shape(., start=0)", fillcolor="#d2a81f"];
Expand_46 [label="Expand(., .)", fillcolor="#cccccc"];
Unsqueeze_47 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Expand_48 [label="Expand(., .)", fillcolor="#cccccc"];
Unsqueeze_49 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Concat_50 [label="Concat(., ., axis=-1)", fillcolor="#cccccc"];
GatherND_51 [label="GatherND(., .)", fillcolor="#cccccc"];
And_52 [label="And(., .)", fillcolor="#cccccc"];
Reshape_53 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_54 [label="Concat(., [1], ., ., axis=0)", fillcolor="#cccccc"];
Expand_55 [label="Expand(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_56 [label="SimplifiedLayerNormalization\n(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
MatMul_57 [label="MatMul(., .)", fillcolor="#ee9999"];
Concat_58 [label="Concat(., ., [-1], [96], axis=0)", fillcolor="#cccccc"];
Reshape_59 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_60 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_61 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_62 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_63 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_64 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_65 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_66 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
ReduceMax_67 [label="ReduceMax(.)", fillcolor="#cccccc"];
Add_68 [label="Add(., 1)", fillcolor="#cccccc"];
Range_69 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Reshape_70 [label="Reshape(., [-1, 1])", fillcolor="#eeeeee"];
Cast_71 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
MatMul_72 [label="MatMul(., .)", fillcolor="#ee9999"];
Cos_73 [label="Cos(.)", fillcolor="#cccccc"];
Sin_74 [label="Sin(.)", fillcolor="#cccccc"];
RotaryEmbedding_75 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
RotaryEmbedding_76 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
Concat_77 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_78 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_79 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_80 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_81 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_82 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_83 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_84 [label="Reshape(., .)", fillcolor="#eeeeee"];
Unsqueeze_85 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_86 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_87 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_88 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_89 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_90 [label="Reshape(., .)", fillcolor="#eeeeee"];
Slice_91 [label="Slice(., [0], ., [3], [1])", fillcolor="#cccccc"];
Shape_92 [label="Shape(., start=0)", fillcolor="#d2a81f"];
Slice_93 [label="Slice(., [-1], [9223372036854775807])", fillcolor="#cccccc"];
Slice_94 [label="Slice(., [-2], [-1])", fillcolor="#cccccc"];
Slice_95 [label="Slice\n(., [-9223372036854775808], [-2])", fillcolor="#cccccc"];
Concat_96 [label="Concat([-1], ., ., axis=0)", fillcolor="#cccccc"];
Reshape_97 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_98 [label="Transpose(., perm=[0, 2, 1])", fillcolor="#ee99ee"];
Concat_99 [label="Concat(., ., ., axis=0)", fillcolor="#cccccc"];
Reshape_100 [label="Reshape(., .)", fillcolor="#eeeeee"];
Mul_101 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Mul_102 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Where_103 [label="Where(., 0.0, -inf)", fillcolor="#cccccc"];
MatMul_104 [label="MatMul(., .)", fillcolor="#ee9999"];
Add_105 [label="Add(., .)", fillcolor="#cccccc"];
Softmax_106 [label="Softmax(., axis=-1)", fillcolor="#cccccc"];
IsNaN_107 [label="IsNaN(.)", fillcolor="#cccccc"];
Where_108 [label="Where(., 0.0, .)", fillcolor="#cccccc"];
MatMul_109 [label="MatMul(., .)", fillcolor="#ee9999"];
Transpose_110 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Concat_111 [label="Concat(., ., [-1], axis=0)", fillcolor="#cccccc"];
Reshape_112 [label="Reshape(., .)", fillcolor="#eeeeee"];
MatMul_113 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_114 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_115 [label="MatMul(., .)", fillcolor="#ee9999"];
Sigmoid_116 [label="Sigmoid(.)", fillcolor="#cccccc"];
Mul_117 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_118 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_119 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_120 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_121 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_122 [label="MatMul(., .)", fillcolor="#ee9999"];
I_0 -> Shape_16 [label="INT64(batch,seq_length)"];
Shape_16 -> Squeeze_17 [label="INT64(1)"];
I_2 -> Shape_18 [label="INT64(batch,seq_length)"];
Shape_18 -> Squeeze_19 [label="INT64(1)"];
I_2 -> Shape_20 [label="INT64(batch,seq_length)"];
Shape_20 -> Squeeze_21 [label="INT64(1)"];
I_3 -> Shape_22 [label="FLOAT(batch,1,cache_length,96)"];
Shape_22 -> Squeeze_23 [label="INT64(1)"];
I_4 -> Shape_24 [label="FLOAT(batch,1,cache_length,96)"];
Shape_24 -> Squeeze_25 [label="INT64(1)"];
Squeeze_23 -> Add_26 [label="INT64()"];
Squeeze_17 -> Add_26 [label="INT64()"];
Squeeze_17 -> Max_27 [label="INT64()"];
Squeeze_21 -> Max_27 [label="INT64()"];
Squeeze_23 -> Add_28 [label="INT64()"];
Max_27 -> Add_28 [label="INT64()"];
Squeeze_25 -> Add_29 [label="INT64()"];
Squeeze_17 -> Add_29 [label="INT64()"];
i_7 -> Gather_30 [label="FLOAT(32000, 192)"];
I_0 -> Gather_30 [label="INT64(batch,seq_length)"];
Squeeze_17 -> Range_31 [label="INT64()"];
Range_31 -> Add_32 [label="INT64(seq_length)"];
Squeeze_23 -> Add_32 [label="INT64()"];
I_1 -> Cast_33 [label="INT64(batch,cache+seq)"];
Squeeze_19 -> Range_34 [label="INT64()"];
Add_26 -> Range_35 [label="INT64()"];
Range_34 -> Unsqueeze_36 [label="INT64(batch)"];
Unsqueeze_36 -> Unsqueeze_37 [label="INT64(batch,1,1)"];
Add_32 -> Unsqueeze_38 [label="INT64(seq_length)"];
Unsqueeze_38 -> Unsqueeze_39 [label="INT64(1,1,seq_length)"];
Range_35 -> Unsqueeze_40 [label="INT64(cache_length + seq_length)"];
Unsqueeze_40 -> Unsqueeze_41 [label="INT64(1,1,cache_length + seq_length)"];
Unsqueeze_41 -> LessOrEqual_42 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_39 -> LessOrEqual_42 [label="INT64(1,1,seq_length,1)"];
LessOrEqual_42 -> And_43 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
Unsqueeze_37 -> Max_44 [label="INT64(batch,1,1,1)"];
Unsqueeze_41 -> Max_44 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Max_44 -> Shape_45 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_37 -> Expand_46 [label="INT64(batch,1,1,1)"];
Shape_45 -> Expand_46 [label="INT64(4)"];
Expand_46 -> Unsqueeze_47 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_41 -> Expand_48 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Shape_45 -> Expand_48 [label="INT64(4)"];
Expand_48 -> Unsqueeze_49 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_47 -> Concat_50 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
Unsqueeze_49 -> Concat_50 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
Cast_33 -> GatherND_51 [label="BOOL(batch,cache+seq)"];
Concat_50 -> GatherND_51 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n2)"];
And_43 -> And_52 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
GatherND_51 -> And_52 [label="BOOL(batch,\n1,\n1,\ncache_length + seq_length)"];
Add_26 -> Reshape_53 [label="INT64()"];
Shape_18 -> Concat_54 [label="INT64(1)"];
Shape_16 -> Concat_54 [label="INT64(1)"];
Reshape_53 -> Concat_54 [label="INT64(1)"];
And_52 -> Expand_55 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Concat_54 -> Expand_55 [label="INT64(4)"];
Gather_30 -> SimplifiedLayerNormalization_56 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SimplifiedLayerNormalization_56 [label="FLOAT(192)"];
SimplifiedLayerNormalization_56 -> MatMul_57 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_57 [label="FLOAT(192, 192)"];
Shape_18 -> Concat_58 [label="INT64(1)"];
Shape_16 -> Concat_58 [label="INT64(1)"];
MatMul_57 -> Reshape_59 [label="FLOAT(batch,seq_length,192)"];
Concat_58 -> Reshape_59 [label="INT64(4)"];
Reshape_59 -> Transpose_60 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_56 -> MatMul_61 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_61 [label="FLOAT(192, 96)"];
MatMul_61 -> Reshape_62 [label="FLOAT(batch,seq_length,96)"];
Concat_58 -> Reshape_62 [label="INT64(4)"];
Reshape_62 -> Transpose_63 [label="FLOAT(batch,seq_length,1,96)"];
SimplifiedLayerNormalization_56 -> MatMul_64 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_64 [label="FLOAT(192, 96)"];
MatMul_64 -> Reshape_65 [label="FLOAT(batch,seq_length,96)"];
Concat_58 -> Reshape_65 [label="INT64(4)"];
Reshape_65 -> Transpose_66 [label="FLOAT(batch,seq_length,1,96)"];
I_2 -> ReduceMax_67 [label="INT64(batch,seq_length)"];
ReduceMax_67 -> Add_68 [label="INT64()"];
Add_68 -> Range_69 [label="INT64()"];
Range_69 -> Reshape_70 [label="INT64(?)"];
Reshape_70 -> Cast_71 [label="INT64(?,1)"];
Cast_71 -> MatMul_72 [label="FLOAT(?,1)"];
i_6 -> MatMul_72 [label="FLOAT(1, 48)"];
MatMul_72 -> Cos_73 [label="FLOAT(?,48)"];
MatMul_72 -> Sin_74 [label="FLOAT(?,48)"];
Transpose_60 -> RotaryEmbedding_75 [label="FLOAT(batch,2,seq_length,96)"];
I_2 -> RotaryEmbedding_75 [label="INT64(batch,seq_length)"];
Cos_73 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
Sin_74 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
Transpose_63 -> RotaryEmbedding_76 [label="FLOAT(batch,1,seq_length,96)"];
I_2 -> RotaryEmbedding_76 [label="INT64(batch,seq_length)"];
Cos_73 -> RotaryEmbedding_76 [label="FLOAT(?,48)"];
Sin_74 -> RotaryEmbedding_76 [label="FLOAT(?,48)"];
I_3 -> Concat_77 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_76 -> Concat_77 [label="FLOAT(batch,\n1,\nMax(seq_length, seq_length),\n96)"];
I_4 -> Concat_78 [label="FLOAT(batch,1,cache_length,96)"];
Transpose_66 -> Concat_78 [label="FLOAT(batch,1,seq_length,96)"];
Concat_77 -> Unsqueeze_79;
Add_28 -> Reshape_80 [label="INT64()"];
Shape_18 -> Concat_81 [label="INT64(1)"];
Reshape_80 -> Concat_81 [label="INT64(1)"];
Unsqueeze_79 -> Expand_82 [label="FLOAT(batch,\n1,\n1,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_81 -> Expand_82 [label="INT64(5)"];
Shape_18 -> Concat_83 [label="INT64(1)"];
Reshape_80 -> Concat_83 [label="INT64(1)"];
Expand_82 -> Reshape_84 [label="FLOAT(batch,\n1,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_83 -> Reshape_84 [label="INT64(4)"];
Concat_78 -> Unsqueeze_85;
Add_29 -> Reshape_86 [label="INT64()"];
Shape_18 -> Concat_87 [label="INT64(1)"];
Reshape_86 -> Concat_87 [label="INT64(1)"];
Unsqueeze_85 -> Expand_88 [label="FLOAT(batch,\n1,\n1,\ncache_length + seq_length,\n96)"];
Concat_87 -> Expand_88 [label="INT64(5)"];
Shape_18 -> Concat_89 [label="INT64(1)"];
Reshape_86 -> Concat_89 [label="INT64(1)"];
Expand_88 -> Reshape_90 [label="FLOAT(batch,\n1,\n2,\ncache_length + seq_length,\n96)"];
Concat_89 -> Reshape_90 [label="INT64(4)"];
Expand_55 -> Slice_91 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Reshape_80 -> Slice_91 [label="INT64(1)"];
Reshape_84 -> Shape_92 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Shape_92 -> Slice_93 [label="INT64(4)"];
Shape_92 -> Slice_94 [label="INT64(4)"];
Shape_92 -> Slice_95 [label="INT64(4)"];
Slice_94 -> Concat_96 [label="INT64(1)"];
Slice_93 -> Concat_96 [label="INT64(1)"];
Reshape_84 -> Reshape_97 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_96 -> Reshape_97 [label="INT64(3)"];
Reshape_97 -> Transpose_98 [label="FLOAT(?,\ncache_length + Max(seq_length, seq_length),\n96)"];
Slice_95 -> Concat_99 [label="INT64(2)"];
Slice_93 -> Concat_99 [label="INT64(1)"];
Slice_94 -> Concat_99 [label="INT64(1)"];
Transpose_98 -> Reshape_100 [label="FLOAT(?,\n96,\ncache_length + Max(seq_length, seq_length))"];
Concat_99 -> Reshape_100 [label="INT64(4)"];
RotaryEmbedding_75 -> Mul_101 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Reshape_100 -> Mul_102 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
Slice_91 -> Where_103 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Mul_101 -> MatMul_104 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Mul_102 -> MatMul_104 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
MatMul_104 -> Add_105 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\ncache_length + Max(seq_length, seq_length))"];
Where_103 -> Add_105 [label="FLOAT(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Add_105 -> Softmax_106 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_106 -> IsNaN_107 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
IsNaN_107 -> Where_108 [label="BOOL(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_106 -> Where_108 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Where_108 -> MatMul_109 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Reshape_90 -> MatMul_109 [label="FLOAT(batch,\n2,\ncache_length + seq_length,\n96)"];
MatMul_109 -> Transpose_110 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Shape_18 -> Concat_111 [label="INT64(1)"];
Shape_16 -> Concat_111 [label="INT64(1)"];
Transpose_110 -> Reshape_112 [label="FLOAT(batch,\nMax(seq_length, seq_length),\n2,\n96)"];
Concat_111 -> Reshape_112 [label="INT64(3)"];
Reshape_112 -> MatMul_113 [label="FLOAT(batch,\nseq_length,\n((192*Max(seq_length, seq_length))//seq_length))"];
i_11 -> MatMul_113 [label="FLOAT(192, 192)"];
MatMul_113 -> SkipSimplifiedLayerNormalization_114 [label="FLOAT(batch,seq_length,192)"];
Gather_30 -> SkipSimplifiedLayerNormalization_114 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_114 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_114 -> MatMul_115 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_115 [label="FLOAT(192, 1024)"];
MatMul_115 -> Sigmoid_116 [label="FLOAT(batch,seq_length,1024)"];
MatMul_115 -> Mul_117 [label="FLOAT(batch,seq_length,1024)"];
Sigmoid_116 -> Mul_117 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_114 -> MatMul_118 [label="FLOAT(batch,seq_length,192)"];
i_13 -> MatMul_118 [label="FLOAT(192, 1024)"];
Mul_117 -> Mul_119 [label="FLOAT(batch,seq_length,1024)"];
MatMul_118 -> Mul_119 [label="FLOAT(batch,seq_length,1024)"];
Mul_119 -> MatMul_120 [label="FLOAT(batch,seq_length,1024)"];
i_14 -> MatMul_120 [label="FLOAT(1024, 192)"];
MatMul_120 -> SkipSimplifiedLayerNormalization_121 [label="FLOAT(batch,seq_length,192)"];
SkipSimplifiedLayerNormalization_114 -> SkipSimplifiedLayerNormalization_121 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_121 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_121 -> MatMul_122 [label="FLOAT(batch,seq_length,192)"];
i_15 -> MatMul_122 [label="FLOAT(192, 32000)"];
O_123 [label="linear_7\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_122 -> O_123;
O_124 [label="cat_7\nFLOAT(batch,1,cache_length + Max(seq_length, seq_length),96)", fillcolor="#aaaaee"];
Concat_77 -> O_124;
O_125 [label="cat_8\nFLOAT(batch,1,cache_length + seq_length,96)", fillcolor="#aaaaee"];
Concat_78 -> O_125;
}](../../_images/graphviz-5174bd96791259860a6e2b567c1a110d150babee.png)
Converts a model into a dot graph. Here is an example:
![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="p_model_layers_0_self_attn_q_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_6 [label="p_model_layers_0_self_attn_k_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_7 [label="p_model_layers_0_self_attn_v_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_8 [label="p_model_layers_0_self_attn_o_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="p_model_layers_0_mlp_gate_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_10 [label="p_model_layers_0_mlp_up_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_11 [label="p_model_layers_0_mlp_down_proj_weight::T10\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_12 [label="p_lm_head_weight::T10\nFLOAT(192, 32000)", fillcolor="#cccc00"];
i_13 [label="to_422\nFLOAT(1, 1, 48)", fillcolor="#cccc00"];
i_14 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_15 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_16 [label="model.layers.0.post_attention_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_17 [label="model.norm.weight\nFLOAT(192)", fillcolor="#cccc00"];
ReduceMax_18 [label="ReduceMax(.)", fillcolor="#cccccc"];
Add_19 [label="Add(., 1)", fillcolor="#cccccc"];
Range_20 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_21 [label="Unsqueeze(., [0])", fillcolor="#eeeeee"];
Shape_22 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_24 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_26 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Add_27 [label="Add(., .)", fillcolor="#cccccc"];
Squeeze_28 [label="Squeeze(.)", fillcolor="#eeeeee"];
Gather_29 [label="Gather(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_30 [label="SimplifiedLayerNormalization\n(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
Range_31 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Add_32 [label="Add(., .)", fillcolor="#cccccc"];
Cast_33 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Range_35 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_36 [label="Unsqueeze(., [1, 2, 3])", fillcolor="#eeeeee"];
Unsqueeze_37 [label="Unsqueeze(., [0, 1, 3])", fillcolor="#eeeeee"];
Unsqueeze_38 [label="Unsqueeze(., [0, 1, 2])", fillcolor="#eeeeee"];
LessOrEqual_39 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
Shape_40 [label="Shape(., start=-1)", fillcolor="#d2a81f"];
Mul_41 [label="Mul(., .)", fillcolor="#cccccc"];
Add_42 [label="Add(., .)", fillcolor="#cccccc"];
Reshape_43 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Reshape_44 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Gather_45 [label="Gather(., .)", fillcolor="#cccccc"];
Shape_46 [label="Shape(.)", fillcolor="#d2a81f"];
Reshape_47 [label="Reshape(., .)", fillcolor="#eeeeee"];
And_48 [label="And(., .)", fillcolor="#cccccc"];
CosSinCache_p1_49 [label="intermediate.\nCosSinCache_p1(., .)", fillcolor="#cccccc"];
Squeeze_50 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
Squeeze_51 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
MatMul_52 [label="MatMul(., .)", fillcolor="#ee9999"];
RotaryEmbedding_53 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
Reshape_54 [label="Reshape(., [0, 0, 2, 96])", fillcolor="#eeeeee"];
Transpose_55 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_56 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_57 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
RotaryEmbedding_58 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
MatMul_59 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_60 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
Concat_61 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_62 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_63 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_64 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_65 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
Unsqueeze_66 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_67 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_68 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
LocalAttention_to1_69 [label="intermediate.\nLocalAttention_to1\n(., ., ., ., [0.31947157])", fillcolor="#cccccc"];
Transpose_70 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Reshape_71 [label="Reshape(., [0, 0, 192])", fillcolor="#eeeeee"];
MatMul_72 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_73 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_74 [label="MatMul(., .)", fillcolor="#ee9999"];
QuickGelu_75 [label="com.microsoft.\nQuickGelu(.)", fillcolor="#cccccc"];
MatMul_76 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_77 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_78 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_79 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_80 [label="MatMul(., .)", fillcolor="#ee9999"];
I_2 -> ReduceMax_18 [label="INT64(batch,seq_length)"];
ReduceMax_18 -> Add_19 [label="INT64()"];
Add_19 -> Range_20 [label="INT64()"];
Range_20 -> Unsqueeze_21 [label="INT64(NEWDIM_range1)"];
I_0 -> Shape_22 [label="INT64(batch,seq_length)"];
Shape_22 -> Squeeze_23 [label="INT64(1)"];
I_2 -> Shape_24 [label="INT64(batch,seq_length)"];
Shape_24 -> Squeeze_25 [label="INT64(1)"];
I_3 -> Shape_26 [label="FLOAT(batch,1,cache_length,96)"];
Shape_26 -> Add_27 [label="INT64(1)"];
Shape_22 -> Add_27 [label="INT64(1)"];
Add_27 -> Squeeze_28 [label="INT64(1)"];
i_14 -> Gather_29 [label="FLOAT(32000, 192)"];
I_0 -> Gather_29 [label="INT64(batch,seq_length)"];
Gather_29 -> SimplifiedLayerNormalization_30 [label="FLOAT(batch,seq_length,192)"];
i_15 -> SimplifiedLayerNormalization_30 [label="FLOAT(192)"];
Squeeze_23 -> Range_31 [label="INT64()"];
Range_31 -> Add_32 [label="INT64(seq_length)"];
Shape_26 -> Add_32 [label="INT64(1)"];
I_1 -> Cast_33 [label="INT64(batch,cache+seq)"];
Squeeze_25 -> Range_34 [label="INT64()"];
Squeeze_28 -> Range_35 [label="INT64()"];
Range_34 -> Unsqueeze_36 [label="INT64(batch)"];
Add_32 -> Unsqueeze_37 [label="INT64(seq_length)"];
Range_35 -> Unsqueeze_38 [label="INT64(cache_length+seq_length)"];
Unsqueeze_38 -> LessOrEqual_39 [label="INT64(1,1,1,cache_length+seq_length)"];
Unsqueeze_37 -> LessOrEqual_39 [label="INT64(1,1,seq_length,1)"];
Cast_33 -> Shape_40 [label="BOOL(batch,cache+seq)"];
Unsqueeze_36 -> Mul_41 [label="INT64(batch,1,1,1)"];
Shape_40 -> Mul_41 [label="INT64(1)"];
Unsqueeze_38 -> Add_42 [label="INT64(1,1,1,cache_length+seq_length)"];
Mul_41 -> Add_42 [label="INT64(batch,1,1,1)"];
Cast_33 -> Reshape_43 [label="BOOL(batch,cache+seq)"];
Add_42 -> Reshape_44 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
Reshape_43 -> Gather_45 [label="BOOL(batch*(cache+seq))"];
Reshape_44 -> Gather_45 [label="INT64(batch*(cache_length+seq_length))"];
Add_42 -> Shape_46 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
Gather_45 -> Reshape_47 [label="BOOL(batch*(cache_length+seq_length))"];
Shape_46 -> Reshape_47 [label="INT64(4)"];
LessOrEqual_39 -> And_48 [label="BOOL(1,\n1,\nseq_length,\ncache_length+seq_length)"];
Reshape_47 -> And_48 [label="BOOL(batch,\n1,\n1,\ncache_length+seq_length)"];
Unsqueeze_21 -> CosSinCache_p1_49 [label="INT64(1,NEWDIM_range1)"];
i_13 -> CosSinCache_p1_49 [label="FLOAT(1, 1, 48)"];
CosSinCache_p1_49 -> Squeeze_50 [label="FLOAT(1,NEWDIM_range1,48)"];
CosSinCache_p1_49 -> Squeeze_51 [label="FLOAT(1,NEWDIM_range1,48)"];
SimplifiedLayerNormalization_30 -> MatMul_52 [label="FLOAT(batch,seq_length,192)"];
i_5 -> MatMul_52 [label="FLOAT(192, 192)"];
MatMul_52 -> RotaryEmbedding_53 [label="FLOAT(batch,seq_length,192)"];
I_2 -> RotaryEmbedding_53 [label="INT64(batch,seq_length)"];
Squeeze_51 -> RotaryEmbedding_53 [label="FLOAT(NEWDIM_range1,48)"];
Squeeze_50 -> RotaryEmbedding_53 [label="FLOAT(NEWDIM_range1,48)"];
RotaryEmbedding_53 -> Reshape_54 [label="FLOAT(batch,seq_length,192)"];
Reshape_54 -> Transpose_55 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_30 -> MatMul_56 [label="FLOAT(batch,seq_length,192)"];
i_6 -> MatMul_56 [label="FLOAT(192, 96)"];
MatMul_56 -> Unsqueeze_57 [label="FLOAT(batch,seq_length,96)"];
Unsqueeze_57 -> RotaryEmbedding_58 [label="FLOAT(batch,1,seq_length,96)"];
I_2 -> RotaryEmbedding_58 [label="INT64(batch,seq_length)"];
Squeeze_51 -> RotaryEmbedding_58 [label="FLOAT(NEWDIM_range1,48)"];
Squeeze_50 -> RotaryEmbedding_58 [label="FLOAT(NEWDIM_range1,48)"];
SimplifiedLayerNormalization_30 -> MatMul_59 [label="FLOAT(batch,seq_length,192)"];
i_7 -> MatMul_59 [label="FLOAT(192, 96)"];
MatMul_59 -> Unsqueeze_60 [label="FLOAT(batch,seq_length,96)"];
I_3 -> Concat_61 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_58 -> Concat_61 [label="FLOAT(batch,1,seq_length,96)"];
I_4 -> Concat_62 [label="FLOAT(batch,1,cache_length,96)"];
Unsqueeze_60 -> Concat_62 [label="FLOAT(batch,1,seq_length,96)"];
Concat_61 -> Unsqueeze_63;
Unsqueeze_63 -> Expand_64 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_64 -> Squeeze_65 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Concat_62 -> Unsqueeze_66;
Unsqueeze_66 -> Expand_67 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_67 -> Squeeze_68 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Transpose_55 -> LocalAttention_to1_69 [label="FLOAT(batch,2,seq_length,96)"];
Squeeze_65 -> LocalAttention_to1_69 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
Squeeze_68 -> LocalAttention_to1_69 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
And_48 -> LocalAttention_to1_69 [label="BOOL(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
LocalAttention_to1_69 -> Transpose_70 [label="FLOAT(batch,2,seq_length,96)"];
Transpose_70 -> Reshape_71 [label="FLOAT(batch,seq_length,2,96)"];
Reshape_71 -> MatMul_72 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_72 [label="FLOAT(192, 192)"];
Gather_29 -> SkipSimplifiedLayerNormalization_73 [label="FLOAT(batch,seq_length,192)"];
MatMul_72 -> SkipSimplifiedLayerNormalization_73 [label="FLOAT(batch,seq_length,192)"];
i_16 -> SkipSimplifiedLayerNormalization_73 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_73 -> MatMul_74 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_74 [label="FLOAT(192, 1024)"];
MatMul_74 -> QuickGelu_75 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_73 -> MatMul_76 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_76 [label="FLOAT(192, 1024)"];
QuickGelu_75 -> Mul_77 [label="FLOAT(batch,seq_length,1024)"];
MatMul_76 -> Mul_77 [label="FLOAT(batch,seq_length,1024)"];
Mul_77 -> MatMul_78 [label="FLOAT(batch,seq_length,1024)"];
i_11 -> MatMul_78 [label="FLOAT(1024, 192)"];
SkipSimplifiedLayerNormalization_73 -> SkipSimplifiedLayerNormalization_79 [label="FLOAT(batch,seq_length,192)"];
MatMul_78 -> SkipSimplifiedLayerNormalization_79 [label="FLOAT(batch,seq_length,192)"];
i_17 -> SkipSimplifiedLayerNormalization_79 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_79 -> MatMul_80 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_80 [label="FLOAT(192, 32000)"];
O_81 [label="output_0\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_80 -> O_81;
O_82 [label="present_key_values_key_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_61 -> O_82;
O_83 [label="present_key_values_value_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_62 -> O_83;
}](../../_images/graphviz-bd6b057e4459e5db66474b38f3e6216c8a901225.png)
Or this one obtained with torch.onnx.export().
![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_6 [label="val_344\nFLOAT(1, 48)", fillcolor="#cccc00"];
i_7 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_8 [label="val_120\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="val_127\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_10 [label="val_134\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_11 [label="val_288\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_12 [label="val_293\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_13 [label="val_295\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_14 [label="val_296\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_15 [label="val_321\nFLOAT(192, 32000)", fillcolor="#cccc00"];
Shape_16 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_17 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_18 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Squeeze_19 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_22 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_24 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
Add_26 [label="Add(., .)", fillcolor="#cccccc"];
Max_27 [label="Max(., .)", fillcolor="#cccccc"];
Add_28 [label="Add(., .)", fillcolor="#cccccc"];
Add_29 [label="Add(., .)", fillcolor="#cccccc"];
Gather_30 [label="Gather(., ., axis=0)", fillcolor="#cccccc"];
Range_31 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Add_32 [label="Add(., .)", fillcolor="#cccccc"];
Cast_33 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Range_35 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_36 [label="Unsqueeze(., [1, 2])", fillcolor="#eeeeee"];
Unsqueeze_37 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
Unsqueeze_38 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
Unsqueeze_39 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
Unsqueeze_40 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
Unsqueeze_41 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
LessOrEqual_42 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
And_43 [label="And(True, .)", fillcolor="#cccccc"];
Max_44 [label="Max(., .)", fillcolor="#cccccc"];
Shape_45 [label="Shape(., start=0)", fillcolor="#d2a81f"];
Expand_46 [label="Expand(., .)", fillcolor="#cccccc"];
Unsqueeze_47 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Expand_48 [label="Expand(., .)", fillcolor="#cccccc"];
Unsqueeze_49 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Concat_50 [label="Concat(., ., axis=-1)", fillcolor="#cccccc"];
GatherND_51 [label="GatherND(., .)", fillcolor="#cccccc"];
And_52 [label="And(., .)", fillcolor="#cccccc"];
Reshape_53 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_54 [label="Concat(., [1], ., ., axis=0)", fillcolor="#cccccc"];
Expand_55 [label="Expand(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_56 [label="SimplifiedLayerNormalization\n(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
MatMul_57 [label="MatMul(., .)", fillcolor="#ee9999"];
Concat_58 [label="Concat(., ., [-1], [96], axis=0)", fillcolor="#cccccc"];
Reshape_59 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_60 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_61 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_62 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_63 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_64 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_65 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_66 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
ReduceMax_67 [label="ReduceMax(.)", fillcolor="#cccccc"];
Add_68 [label="Add(., 1)", fillcolor="#cccccc"];
Range_69 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Reshape_70 [label="Reshape(., [-1, 1])", fillcolor="#eeeeee"];
Cast_71 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
MatMul_72 [label="MatMul(., .)", fillcolor="#ee9999"];
Cos_73 [label="Cos(.)", fillcolor="#cccccc"];
Sin_74 [label="Sin(.)", fillcolor="#cccccc"];
RotaryEmbedding_75 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
RotaryEmbedding_76 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
Concat_77 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_78 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_79 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_80 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_81 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_82 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_83 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_84 [label="Reshape(., .)", fillcolor="#eeeeee"];
Unsqueeze_85 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_86 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_87 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_88 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_89 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_90 [label="Reshape(., .)", fillcolor="#eeeeee"];
Slice_91 [label="Slice(., [0], ., [3], [1])", fillcolor="#cccccc"];
Shape_92 [label="Shape(., start=0)", fillcolor="#d2a81f"];
Slice_93 [label="Slice(., [-1], [9223372036854775807])", fillcolor="#cccccc"];
Slice_94 [label="Slice(., [-2], [-1])", fillcolor="#cccccc"];
Slice_95 [label="Slice\n(., [-9223372036854775808], [-2])", fillcolor="#cccccc"];
Concat_96 [label="Concat([-1], ., ., axis=0)", fillcolor="#cccccc"];
Reshape_97 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_98 [label="Transpose(., perm=[0, 2, 1])", fillcolor="#ee99ee"];
Concat_99 [label="Concat(., ., ., axis=0)", fillcolor="#cccccc"];
Reshape_100 [label="Reshape(., .)", fillcolor="#eeeeee"];
Mul_101 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Mul_102 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Where_103 [label="Where(., 0.0, -inf)", fillcolor="#cccccc"];
MatMul_104 [label="MatMul(., .)", fillcolor="#ee9999"];
Add_105 [label="Add(., .)", fillcolor="#cccccc"];
Softmax_106 [label="Softmax(., axis=-1)", fillcolor="#cccccc"];
IsNaN_107 [label="IsNaN(.)", fillcolor="#cccccc"];
Where_108 [label="Where(., 0.0, .)", fillcolor="#cccccc"];
MatMul_109 [label="MatMul(., .)", fillcolor="#ee9999"];
Transpose_110 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Concat_111 [label="Concat(., ., [-1], axis=0)", fillcolor="#cccccc"];
Reshape_112 [label="Reshape(., .)", fillcolor="#eeeeee"];
MatMul_113 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_114 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_115 [label="MatMul(., .)", fillcolor="#ee9999"];
Sigmoid_116 [label="Sigmoid(.)", fillcolor="#cccccc"];
Mul_117 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_118 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_119 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_120 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_121 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_122 [label="MatMul(., .)", fillcolor="#ee9999"];
I_0 -> Shape_16 [label="INT64(batch,seq_length)"];
Shape_16 -> Squeeze_17 [label="INT64(1)"];
I_2 -> Shape_18 [label="INT64(batch,seq_length)"];
Shape_18 -> Squeeze_19 [label="INT64(1)"];
I_2 -> Shape_20 [label="INT64(batch,seq_length)"];
Shape_20 -> Squeeze_21 [label="INT64(1)"];
I_3 -> Shape_22 [label="FLOAT(batch,1,cache_length,96)"];
Shape_22 -> Squeeze_23 [label="INT64(1)"];
I_4 -> Shape_24 [label="FLOAT(batch,1,cache_length,96)"];
Shape_24 -> Squeeze_25 [label="INT64(1)"];
Squeeze_23 -> Add_26 [label="INT64()"];
Squeeze_17 -> Add_26 [label="INT64()"];
Squeeze_17 -> Max_27 [label="INT64()"];
Squeeze_21 -> Max_27 [label="INT64()"];
Squeeze_23 -> Add_28 [label="INT64()"];
Max_27 -> Add_28 [label="INT64()"];
Squeeze_25 -> Add_29 [label="INT64()"];
Squeeze_17 -> Add_29 [label="INT64()"];
i_7 -> Gather_30 [label="FLOAT(32000, 192)"];
I_0 -> Gather_30 [label="INT64(batch,seq_length)"];
Squeeze_17 -> Range_31 [label="INT64()"];
Range_31 -> Add_32 [label="INT64(seq_length)"];
Squeeze_23 -> Add_32 [label="INT64()"];
I_1 -> Cast_33 [label="INT64(batch,cache+seq)"];
Squeeze_19 -> Range_34 [label="INT64()"];
Add_26 -> Range_35 [label="INT64()"];
Range_34 -> Unsqueeze_36 [label="INT64(batch)"];
Unsqueeze_36 -> Unsqueeze_37 [label="INT64(batch,1,1)"];
Add_32 -> Unsqueeze_38 [label="INT64(seq_length)"];
Unsqueeze_38 -> Unsqueeze_39 [label="INT64(1,1,seq_length)"];
Range_35 -> Unsqueeze_40 [label="INT64(cache_length + seq_length)"];
Unsqueeze_40 -> Unsqueeze_41 [label="INT64(1,1,cache_length + seq_length)"];
Unsqueeze_41 -> LessOrEqual_42 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_39 -> LessOrEqual_42 [label="INT64(1,1,seq_length,1)"];
LessOrEqual_42 -> And_43 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
Unsqueeze_37 -> Max_44 [label="INT64(batch,1,1,1)"];
Unsqueeze_41 -> Max_44 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Max_44 -> Shape_45 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_37 -> Expand_46 [label="INT64(batch,1,1,1)"];
Shape_45 -> Expand_46 [label="INT64(4)"];
Expand_46 -> Unsqueeze_47 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_41 -> Expand_48 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Shape_45 -> Expand_48 [label="INT64(4)"];
Expand_48 -> Unsqueeze_49 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
Unsqueeze_47 -> Concat_50 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
Unsqueeze_49 -> Concat_50 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
Cast_33 -> GatherND_51 [label="BOOL(batch,cache+seq)"];
Concat_50 -> GatherND_51 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n2)"];
And_43 -> And_52 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
GatherND_51 -> And_52 [label="BOOL(batch,\n1,\n1,\ncache_length + seq_length)"];
Add_26 -> Reshape_53 [label="INT64()"];
Shape_18 -> Concat_54 [label="INT64(1)"];
Shape_16 -> Concat_54 [label="INT64(1)"];
Reshape_53 -> Concat_54 [label="INT64(1)"];
And_52 -> Expand_55 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Concat_54 -> Expand_55 [label="INT64(4)"];
Gather_30 -> SimplifiedLayerNormalization_56 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SimplifiedLayerNormalization_56 [label="FLOAT(192)"];
SimplifiedLayerNormalization_56 -> MatMul_57 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_57 [label="FLOAT(192, 192)"];
Shape_18 -> Concat_58 [label="INT64(1)"];
Shape_16 -> Concat_58 [label="INT64(1)"];
MatMul_57 -> Reshape_59 [label="FLOAT(batch,seq_length,192)"];
Concat_58 -> Reshape_59 [label="INT64(4)"];
Reshape_59 -> Transpose_60 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_56 -> MatMul_61 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_61 [label="FLOAT(192, 96)"];
MatMul_61 -> Reshape_62 [label="FLOAT(batch,seq_length,96)"];
Concat_58 -> Reshape_62 [label="INT64(4)"];
Reshape_62 -> Transpose_63 [label="FLOAT(batch,seq_length,1,96)"];
SimplifiedLayerNormalization_56 -> MatMul_64 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_64 [label="FLOAT(192, 96)"];
MatMul_64 -> Reshape_65 [label="FLOAT(batch,seq_length,96)"];
Concat_58 -> Reshape_65 [label="INT64(4)"];
Reshape_65 -> Transpose_66 [label="FLOAT(batch,seq_length,1,96)"];
I_2 -> ReduceMax_67 [label="INT64(batch,seq_length)"];
ReduceMax_67 -> Add_68 [label="INT64()"];
Add_68 -> Range_69 [label="INT64()"];
Range_69 -> Reshape_70 [label="INT64(?)"];
Reshape_70 -> Cast_71 [label="INT64(?,1)"];
Cast_71 -> MatMul_72 [label="FLOAT(?,1)"];
i_6 -> MatMul_72 [label="FLOAT(1, 48)"];
MatMul_72 -> Cos_73 [label="FLOAT(?,48)"];
MatMul_72 -> Sin_74 [label="FLOAT(?,48)"];
Transpose_60 -> RotaryEmbedding_75 [label="FLOAT(batch,2,seq_length,96)"];
I_2 -> RotaryEmbedding_75 [label="INT64(batch,seq_length)"];
Cos_73 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
Sin_74 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
Transpose_63 -> RotaryEmbedding_76 [label="FLOAT(batch,1,seq_length,96)"];
I_2 -> RotaryEmbedding_76 [label="INT64(batch,seq_length)"];
Cos_73 -> RotaryEmbedding_76 [label="FLOAT(?,48)"];
Sin_74 -> RotaryEmbedding_76 [label="FLOAT(?,48)"];
I_3 -> Concat_77 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_76 -> Concat_77 [label="FLOAT(batch,\n1,\nMax(seq_length, seq_length),\n96)"];
I_4 -> Concat_78 [label="FLOAT(batch,1,cache_length,96)"];
Transpose_66 -> Concat_78 [label="FLOAT(batch,1,seq_length,96)"];
Concat_77 -> Unsqueeze_79;
Add_28 -> Reshape_80 [label="INT64()"];
Shape_18 -> Concat_81 [label="INT64(1)"];
Reshape_80 -> Concat_81 [label="INT64(1)"];
Unsqueeze_79 -> Expand_82 [label="FLOAT(batch,\n1,\n1,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_81 -> Expand_82 [label="INT64(5)"];
Shape_18 -> Concat_83 [label="INT64(1)"];
Reshape_80 -> Concat_83 [label="INT64(1)"];
Expand_82 -> Reshape_84 [label="FLOAT(batch,\n1,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_83 -> Reshape_84 [label="INT64(4)"];
Concat_78 -> Unsqueeze_85;
Add_29 -> Reshape_86 [label="INT64()"];
Shape_18 -> Concat_87 [label="INT64(1)"];
Reshape_86 -> Concat_87 [label="INT64(1)"];
Unsqueeze_85 -> Expand_88 [label="FLOAT(batch,\n1,\n1,\ncache_length + seq_length,\n96)"];
Concat_87 -> Expand_88 [label="INT64(5)"];
Shape_18 -> Concat_89 [label="INT64(1)"];
Reshape_86 -> Concat_89 [label="INT64(1)"];
Expand_88 -> Reshape_90 [label="FLOAT(batch,\n1,\n2,\ncache_length + seq_length,\n96)"];
Concat_89 -> Reshape_90 [label="INT64(4)"];
Expand_55 -> Slice_91 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Reshape_80 -> Slice_91 [label="INT64(1)"];
Reshape_84 -> Shape_92 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Shape_92 -> Slice_93 [label="INT64(4)"];
Shape_92 -> Slice_94 [label="INT64(4)"];
Shape_92 -> Slice_95 [label="INT64(4)"];
Slice_94 -> Concat_96 [label="INT64(1)"];
Slice_93 -> Concat_96 [label="INT64(1)"];
Reshape_84 -> Reshape_97 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_96 -> Reshape_97 [label="INT64(3)"];
Reshape_97 -> Transpose_98 [label="FLOAT(?,\ncache_length + Max(seq_length, seq_length),\n96)"];
Slice_95 -> Concat_99 [label="INT64(2)"];
Slice_93 -> Concat_99 [label="INT64(1)"];
Slice_94 -> Concat_99 [label="INT64(1)"];
Transpose_98 -> Reshape_100 [label="FLOAT(?,\n96,\ncache_length + Max(seq_length, seq_length))"];
Concat_99 -> Reshape_100 [label="INT64(4)"];
RotaryEmbedding_75 -> Mul_101 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Reshape_100 -> Mul_102 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
Slice_91 -> Where_103 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Mul_101 -> MatMul_104 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Mul_102 -> MatMul_104 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
MatMul_104 -> Add_105 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\ncache_length + Max(seq_length, seq_length))"];
Where_103 -> Add_105 [label="FLOAT(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Add_105 -> Softmax_106 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_106 -> IsNaN_107 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
IsNaN_107 -> Where_108 [label="BOOL(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_106 -> Where_108 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Where_108 -> MatMul_109 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Reshape_90 -> MatMul_109 [label="FLOAT(batch,\n2,\ncache_length + seq_length,\n96)"];
MatMul_109 -> Transpose_110 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Shape_18 -> Concat_111 [label="INT64(1)"];
Shape_16 -> Concat_111 [label="INT64(1)"];
Transpose_110 -> Reshape_112 [label="FLOAT(batch,\nMax(seq_length, seq_length),\n2,\n96)"];
Concat_111 -> Reshape_112 [label="INT64(3)"];
Reshape_112 -> MatMul_113 [label="FLOAT(batch,\nseq_length,\n((192*Max(seq_length, seq_length))//seq_length))"];
i_11 -> MatMul_113 [label="FLOAT(192, 192)"];
MatMul_113 -> SkipSimplifiedLayerNormalization_114 [label="FLOAT(batch,seq_length,192)"];
Gather_30 -> SkipSimplifiedLayerNormalization_114 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_114 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_114 -> MatMul_115 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_115 [label="FLOAT(192, 1024)"];
MatMul_115 -> Sigmoid_116 [label="FLOAT(batch,seq_length,1024)"];
MatMul_115 -> Mul_117 [label="FLOAT(batch,seq_length,1024)"];
Sigmoid_116 -> Mul_117 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_114 -> MatMul_118 [label="FLOAT(batch,seq_length,192)"];
i_13 -> MatMul_118 [label="FLOAT(192, 1024)"];
Mul_117 -> Mul_119 [label="FLOAT(batch,seq_length,1024)"];
MatMul_118 -> Mul_119 [label="FLOAT(batch,seq_length,1024)"];
Mul_119 -> MatMul_120 [label="FLOAT(batch,seq_length,1024)"];
i_14 -> MatMul_120 [label="FLOAT(1024, 192)"];
MatMul_120 -> SkipSimplifiedLayerNormalization_121 [label="FLOAT(batch,seq_length,192)"];
SkipSimplifiedLayerNormalization_114 -> SkipSimplifiedLayerNormalization_121 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_121 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_121 -> MatMul_122 [label="FLOAT(batch,seq_length,192)"];
i_15 -> MatMul_122 [label="FLOAT(192, 32000)"];
O_123 [label="linear_7\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_122 -> O_123;
O_124 [label="cat_7\nFLOAT(batch,1,cache_length + Max(seq_length, seq_length),96)", fillcolor="#aaaaee"];
Concat_77 -> O_124;
O_125 [label="cat_8\nFLOAT(batch,1,cache_length + seq_length,96)", fillcolor="#aaaaee"];
Concat_78 -> O_125;
}](../../_images/graphviz-5174bd96791259860a6e2b567c1a110d150babee.png)