onnx_diagnostic.helpers.dot_helper¶
- onnx_diagnostic.helpers.dot_helper.to_dot(model: ModelProto) str[source][source]¶
Converts a model into a dot graph. Here is an example:
![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="p_model_layers_0_self_attn_q_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_6 [label="p_model_layers_0_self_attn_k_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_7 [label="p_model_layers_0_self_attn_v_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_8 [label="p_model_layers_0_self_attn_o_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="p_model_layers_0_mlp_gate_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_10 [label="p_model_layers_0_mlp_up_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_11 [label="p_model_layers_0_mlp_down_proj_weight::T10\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_12 [label="p_lm_head_weight::T10\nFLOAT(192, 32000)", fillcolor="#cccc00"];
i_13 [label="to_422\nFLOAT(1, 1, 48)", fillcolor="#cccc00"];
i_14 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_15 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_16 [label="model.layers.0.post_attention_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_17 [label="model.norm.weight\nFLOAT(192)", fillcolor="#cccc00"];
Shape_18 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Shape_19 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Shape_20 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
ReduceMax_21 [label="ReduceMax(.)", fillcolor="#cccccc"];
Add_22 [label="Add(., 1)", fillcolor="#cccccc"];
Range_23 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_24 [label="Unsqueeze(., [0])", fillcolor="#eeeeee"];
Add_25 [label="Add(., .)", fillcolor="#cccccc"];
CausalMask_26 [label="intermediate.\nCausalMask(., .)", fillcolor="#cccccc"];
Gather_27 [label="Gather(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_28 [label="SimplifiedLayerNormalization\n(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
Cast_29 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Shape_30 [label="Shape(., start=-1)", fillcolor="#d2a81f"];
CausalMaskMulAdd_31 [label="intermediate.\nCausalMaskMulAdd\n(., ., .)", fillcolor="#cccccc"];
Concat_32 [label="Concat([1], [1], ., [1], axis=0)", fillcolor="#cccccc"];
Expand_33 [label="Expand(., .)", fillcolor="#cccccc"];
Shape_34 [label="Shape(.)", fillcolor="#d2a81f"];
Reshape_35 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Reshape_36 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Gather_37 [label="Gather(., .)", fillcolor="#cccccc"];
Reshape_38 [label="Reshape(., .)", fillcolor="#eeeeee"];
And_39 [label="And(., .)", fillcolor="#cccccc"];
CosSinCache_p1_40 [label="intermediate.\nCosSinCache_p1(., .)", fillcolor="#cccccc"];
Squeeze_41 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
Squeeze_42 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
MatMul_43 [label="MatMul(., .)", fillcolor="#ee9999"];
RotaryEmbedding_44 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
Reshape_45 [label="Reshape(., [0, 0, 2, 96])", fillcolor="#eeeeee"];
Transpose_46 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_47 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_48 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
RotaryEmbedding_49 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
MatMul_50 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_51 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
Concat_52 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_53 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_54 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_55 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_56 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
Unsqueeze_57 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_58 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_59 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
LocalAttention_to1_60 [label="intermediate.\nLocalAttention_to1\n(., ., ., ., [0.31947157])", fillcolor="#cccccc"];
Transpose_61 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Reshape_62 [label="Reshape(., [0, 0, 192])", fillcolor="#eeeeee"];
MatMul_63 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_64 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_65 [label="MatMul(., .)", fillcolor="#ee9999"];
QuickGelu_66 [label="com.microsoft.\nQuickGelu(.)", fillcolor="#cccccc"];
MatMul_67 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_68 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_69 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_70 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_71 [label="MatMul(., .)", fillcolor="#ee9999"];
I_0 -> Shape_18 [label="INT64(batch,seq_length)"];
I_2 -> Shape_19 [label="INT64(batch,seq_length)"];
I_3 -> Shape_20 [label="FLOAT(batch,1,cache_length,96)"];
I_2 -> ReduceMax_21 [label="INT64(batch,seq_length)"];
ReduceMax_21 -> Add_22 [label="INT64()"];
Add_22 -> Range_23 [label="INT64()"];
Range_23 -> Unsqueeze_24 [label="INT64(NEWDIM_range1)"];
Shape_18 -> Add_25 [label="INT64(1)"];
Shape_20 -> Add_25 [label="INT64(1)"];
Shape_20 -> CausalMask_26 [label="INT64(1)"];
Add_25 -> CausalMask_26 [label="INT64(1)"];
i_14 -> Gather_27 [label="FLOAT(32000, 192)"];
I_0 -> Gather_27 [label="INT64(batch,seq_length)"];
Gather_27 -> SimplifiedLayerNormalization_28 [label="FLOAT(batch,seq_length,192)"];
i_15 -> SimplifiedLayerNormalization_28 [label="FLOAT(192)"];
I_1 -> Cast_29 [label="INT64(batch,cache+seq)"];
Cast_29 -> Shape_30 [label="BOOL(batch,cache+seq)"];
Add_25 -> CausalMaskMulAdd_31 [label="INT64(1)"];
Shape_19 -> CausalMaskMulAdd_31 [label="INT64(1)"];
Shape_30 -> CausalMaskMulAdd_31 [label="INT64(1)"];
Shape_18 -> Concat_32 [label="INT64(1)"];
CausalMaskMulAdd_31 -> Expand_33 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
Concat_32 -> Expand_33 [label="INT64(4)"];
Expand_33 -> Shape_34 [label="INT64(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
Cast_29 -> Reshape_35 [label="BOOL(batch,cache+seq)"];
Expand_33 -> Reshape_36 [label="INT64(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
Reshape_35 -> Gather_37 [label="BOOL(batch*(cache+seq))"];
Reshape_36 -> Gather_37 [label="INT64(batch*seq_length*(cache_length+seq_length))"];
Gather_37 -> Reshape_38 [label="BOOL(batch*seq_length*(cache_length+seq_length))"];
Shape_34 -> Reshape_38 [label="INT64(4)"];
CausalMask_26 -> And_39 [label="BOOL(1,\n1,\nseq_length,\ncache_length+seq_length)"];
Reshape_38 -> And_39 [label="BOOL(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
Unsqueeze_24 -> CosSinCache_p1_40 [label="INT64(1,NEWDIM_range1)"];
i_13 -> CosSinCache_p1_40 [label="FLOAT(1, 1, 48)"];
CosSinCache_p1_40 -> Squeeze_41 [label="FLOAT(1,NEWDIM_range1,48)"];
CosSinCache_p1_40 -> Squeeze_42 [label="FLOAT(1,NEWDIM_range1,48)"];
SimplifiedLayerNormalization_28 -> MatMul_43 [label="FLOAT(batch,seq_length,192)"];
i_5 -> MatMul_43 [label="FLOAT(192, 192)"];
MatMul_43 -> RotaryEmbedding_44 [label="FLOAT(batch,seq_length,192)"];
I_2 -> RotaryEmbedding_44 [label="INT64(batch,seq_length)"];
Squeeze_42 -> RotaryEmbedding_44 [label="FLOAT(NEWDIM_range1,48)"];
Squeeze_41 -> RotaryEmbedding_44 [label="FLOAT(NEWDIM_range1,48)"];
RotaryEmbedding_44 -> Reshape_45 [label="FLOAT(batch,seq_length,192)"];
Reshape_45 -> Transpose_46 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_28 -> MatMul_47 [label="FLOAT(batch,seq_length,192)"];
i_6 -> MatMul_47 [label="FLOAT(192, 96)"];
MatMul_47 -> Unsqueeze_48 [label="FLOAT(batch,seq_length,96)"];
Unsqueeze_48 -> RotaryEmbedding_49 [label="FLOAT(batch,1,seq_length,96)"];
I_2 -> RotaryEmbedding_49 [label="INT64(batch,seq_length)"];
Squeeze_42 -> RotaryEmbedding_49 [label="FLOAT(NEWDIM_range1,48)"];
Squeeze_41 -> RotaryEmbedding_49 [label="FLOAT(NEWDIM_range1,48)"];
SimplifiedLayerNormalization_28 -> MatMul_50 [label="FLOAT(batch,seq_length,192)"];
i_7 -> MatMul_50 [label="FLOAT(192, 96)"];
MatMul_50 -> Unsqueeze_51 [label="FLOAT(batch,seq_length,96)"];
I_3 -> Concat_52 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_49 -> Concat_52 [label="FLOAT(batch,1,seq_length,96)"];
I_4 -> Concat_53 [label="FLOAT(batch,1,cache_length,96)"];
Unsqueeze_51 -> Concat_53 [label="FLOAT(batch,1,seq_length,96)"];
Concat_52 -> Unsqueeze_54;
Unsqueeze_54 -> Expand_55 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_55 -> Squeeze_56 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Concat_53 -> Unsqueeze_57;
Unsqueeze_57 -> Expand_58 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_58 -> Squeeze_59 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Transpose_46 -> LocalAttention_to1_60 [label="FLOAT(batch,2,seq_length,96)"];
Squeeze_56 -> LocalAttention_to1_60 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
Squeeze_59 -> LocalAttention_to1_60 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
And_39 -> LocalAttention_to1_60 [label="BOOL(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
LocalAttention_to1_60 -> Transpose_61 [label="FLOAT(batch,2,seq_length,96)"];
Transpose_61 -> Reshape_62 [label="FLOAT(batch,seq_length,2,96)"];
Reshape_62 -> MatMul_63 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_63 [label="FLOAT(192, 192)"];
Gather_27 -> SkipSimplifiedLayerNormalization_64 [label="FLOAT(batch,seq_length,192)"];
MatMul_63 -> SkipSimplifiedLayerNormalization_64 [label="FLOAT(batch,seq_length,192)"];
i_16 -> SkipSimplifiedLayerNormalization_64 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_64 -> MatMul_65 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_65 [label="FLOAT(192, 1024)"];
MatMul_65 -> QuickGelu_66 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_64 -> MatMul_67 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_67 [label="FLOAT(192, 1024)"];
QuickGelu_66 -> Mul_68 [label="FLOAT(batch,seq_length,1024)"];
MatMul_67 -> Mul_68 [label="FLOAT(batch,seq_length,1024)"];
Mul_68 -> MatMul_69 [label="FLOAT(batch,seq_length,1024)"];
i_11 -> MatMul_69 [label="FLOAT(1024, 192)"];
SkipSimplifiedLayerNormalization_64 -> SkipSimplifiedLayerNormalization_70 [label="FLOAT(batch,seq_length,192)"];
MatMul_69 -> SkipSimplifiedLayerNormalization_70 [label="FLOAT(batch,seq_length,192)"];
i_17 -> SkipSimplifiedLayerNormalization_70 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_70 -> MatMul_71 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_71 [label="FLOAT(192, 32000)"];
O_72 [label="output_0\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_71 -> O_72;
O_73 [label="present_key_values_key_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_52 -> O_73;
O_74 [label="present_key_values_value_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_53 -> O_74;
}](../../_images/graphviz-8d629524be3f32957edb5449edad700102f3efc1.png)
Or this one obtained with
torch.onnx.export().![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_6 [label="val_341\nFLOAT(1, 48)", fillcolor="#cccc00"];
i_7 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_8 [label="val_119\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="val_126\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_10 [label="val_133\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_11 [label="val_288\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_12 [label="val_293\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_13 [label="val_295\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_14 [label="val_296\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_15 [label="val_321\nFLOAT(192, 32000)", fillcolor="#cccc00"];
Shape_16 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_17 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_18 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Squeeze_19 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_22 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_24 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
Add_26 [label="Add(., .)", fillcolor="#cccccc"];
Max_27 [label="Max(., .)", fillcolor="#cccccc"];
Add_28 [label="Add(., .)", fillcolor="#cccccc"];
Add_29 [label="Add(., .)", fillcolor="#cccccc"];
Gather_30 [label="Gather(., ., axis=0)", fillcolor="#cccccc"];
Range_31 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Add_32 [label="Add(., .)", fillcolor="#cccccc"];
Cast_33 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Range_35 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Reshape_36 [label="Reshape(., [-1, 1, 1, 1])", fillcolor="#eeeeee"];
Reshape_37 [label="Reshape(., [1, 1, -1, 1])", fillcolor="#eeeeee"];
Reshape_38 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_39 [label="Concat(., [1], ., ., axis=0)", fillcolor="#cccccc"];
Expand_40 [label="Expand(., .)", fillcolor="#cccccc"];
Expand_41 [label="Expand(., .)", fillcolor="#cccccc"];
Reshape_42 [label="Reshape(., [1, 1, 1, -1])", fillcolor="#eeeeee"];
Expand_43 [label="Expand(., .)", fillcolor="#cccccc"];
LessOrEqual_44 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
And_45 [label="And(True, .)", fillcolor="#cccccc"];
Unsqueeze_46 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Unsqueeze_47 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Concat_48 [label="Concat(., ., axis=-1)", fillcolor="#cccccc"];
GatherND_49 [label="GatherND(., .)", fillcolor="#cccccc"];
And_50 [label="And(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_51 [label="SimplifiedLayerNormalization\n(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
MatMul_52 [label="MatMul(., .)", fillcolor="#ee9999"];
Concat_53 [label="Concat(., ., [-1], [96], axis=0)", fillcolor="#cccccc"];
Reshape_54 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_55 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_56 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_57 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_58 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_59 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_60 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_61 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
ReduceMax_62 [label="ReduceMax(.)", fillcolor="#cccccc"];
Add_63 [label="Add(., 1)", fillcolor="#cccccc"];
Range_64 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Reshape_65 [label="Reshape(., [-1, 1])", fillcolor="#eeeeee"];
Cast_66 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
MatMul_67 [label="MatMul(., .)", fillcolor="#ee9999"];
Cos_68 [label="Cos(.)", fillcolor="#cccccc"];
Sin_69 [label="Sin(.)", fillcolor="#cccccc"];
RotaryEmbedding_70 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
RotaryEmbedding_71 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
Concat_72 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_73 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_74 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_75 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_76 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_77 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_78 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_79 [label="Reshape(., .)", fillcolor="#eeeeee"];
Unsqueeze_80 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_81 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_82 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_83 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_84 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_85 [label="Reshape(., .)", fillcolor="#eeeeee"];
Slice_86 [label="Slice(., [0], ., [3], [1])", fillcolor="#cccccc"];
Shape_87 [label="Shape(., start=0)", fillcolor="#d2a81f"];
Slice_88 [label="Slice(., [-1], [9223372036854775807])", fillcolor="#cccccc"];
Slice_89 [label="Slice(., [-2], [-1])", fillcolor="#cccccc"];
Slice_90 [label="Slice\n(., [-9223372036854775808], [-2])", fillcolor="#cccccc"];
Concat_91 [label="Concat([-1], ., ., axis=0)", fillcolor="#cccccc"];
Reshape_92 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_93 [label="Transpose(., perm=[0, 2, 1])", fillcolor="#ee99ee"];
Concat_94 [label="Concat(., ., ., axis=0)", fillcolor="#cccccc"];
Reshape_95 [label="Reshape(., .)", fillcolor="#eeeeee"];
Mul_96 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Mul_97 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Where_98 [label="Where(., 0.0, -3.4028235e+38)", fillcolor="#cccccc"];
MatMul_99 [label="MatMul(., .)", fillcolor="#ee9999"];
Add_100 [label="Add(., .)", fillcolor="#cccccc"];
Softmax_101 [label="Softmax(., axis=-1)", fillcolor="#cccccc"];
IsNaN_102 [label="IsNaN(.)", fillcolor="#cccccc"];
Where_103 [label="Where(., 0.0, .)", fillcolor="#cccccc"];
MatMul_104 [label="MatMul(., .)", fillcolor="#ee9999"];
Transpose_105 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Concat_106 [label="Concat(., ., [-1], axis=0)", fillcolor="#cccccc"];
Reshape_107 [label="Reshape(., .)", fillcolor="#eeeeee"];
MatMul_108 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_109 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_110 [label="MatMul(., .)", fillcolor="#ee9999"];
Sigmoid_111 [label="Sigmoid(.)", fillcolor="#cccccc"];
Mul_112 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_113 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_114 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_115 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_116 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_117 [label="MatMul(., .)", fillcolor="#ee9999"];
I_0 -> Shape_16 [label="INT64(batch,seq_length)"];
Shape_16 -> Squeeze_17 [label="INT64(1)"];
I_2 -> Shape_18 [label="INT64(batch,seq_length)"];
Shape_18 -> Squeeze_19 [label="INT64(1)"];
I_2 -> Shape_20 [label="INT64(batch,seq_length)"];
Shape_20 -> Squeeze_21 [label="INT64(1)"];
I_3 -> Shape_22 [label="FLOAT(batch,1,cache_length,96)"];
Shape_22 -> Squeeze_23 [label="INT64(1)"];
I_4 -> Shape_24 [label="FLOAT(batch,1,cache_length,96)"];
Shape_24 -> Squeeze_25 [label="INT64(1)"];
Squeeze_23 -> Add_26 [label="INT64()"];
Squeeze_17 -> Add_26 [label="INT64()"];
Squeeze_17 -> Max_27 [label="INT64()"];
Squeeze_21 -> Max_27 [label="INT64()"];
Squeeze_23 -> Add_28 [label="INT64()"];
Max_27 -> Add_28 [label="INT64()"];
Squeeze_25 -> Add_29 [label="INT64()"];
Squeeze_17 -> Add_29 [label="INT64()"];
i_7 -> Gather_30 [label="FLOAT(32000, 192)"];
I_0 -> Gather_30 [label="INT64(batch,seq_length)"];
Squeeze_17 -> Range_31 [label="INT64()"];
Range_31 -> Add_32 [label="INT64(seq_length)"];
Squeeze_23 -> Add_32 [label="INT64()"];
I_1 -> Cast_33 [label="INT64(batch,cache+seq)"];
Add_26 -> Range_34 [label="INT64()"];
Squeeze_19 -> Range_35 [label="INT64()"];
Range_35 -> Reshape_36 [label="INT64(batch)"];
Add_32 -> Reshape_37 [label="INT64(seq_length)"];
Add_26 -> Reshape_38 [label="INT64()"];
Shape_18 -> Concat_39 [label="INT64(1)"];
Shape_16 -> Concat_39 [label="INT64(1)"];
Reshape_38 -> Concat_39 [label="INT64(1)"];
Reshape_36 -> Expand_40 [label="INT64(batch,1,1,1)"];
Concat_39 -> Expand_40 [label="INT64(4)"];
Reshape_37 -> Expand_41 [label="INT64(1,1,seq_length,1)"];
Concat_39 -> Expand_41 [label="INT64(4)"];
Range_34 -> Reshape_42 [label="INT64(cache_length + seq_length)"];
Reshape_42 -> Expand_43 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Concat_39 -> Expand_43 [label="INT64(4)"];
Expand_43 -> LessOrEqual_44 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Expand_41 -> LessOrEqual_44 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
LessOrEqual_44 -> And_45 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Expand_40 -> Unsqueeze_46 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Expand_43 -> Unsqueeze_47 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Unsqueeze_46 -> Concat_48 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length,\n1)"];
Unsqueeze_47 -> Concat_48 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length,\n1)"];
Cast_33 -> GatherND_49 [label="BOOL(batch,cache+seq)"];
Concat_48 -> GatherND_49 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length,\n2)"];
And_45 -> And_50 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
GatherND_49 -> And_50 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Gather_30 -> SimplifiedLayerNormalization_51 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SimplifiedLayerNormalization_51 [label="FLOAT(192)"];
SimplifiedLayerNormalization_51 -> MatMul_52 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_52 [label="FLOAT(192, 192)"];
Shape_18 -> Concat_53 [label="INT64(1)"];
Shape_16 -> Concat_53 [label="INT64(1)"];
MatMul_52 -> Reshape_54 [label="FLOAT(batch,seq_length,192)"];
Concat_53 -> Reshape_54 [label="INT64(4)"];
Reshape_54 -> Transpose_55 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_51 -> MatMul_56 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_56 [label="FLOAT(192, 96)"];
MatMul_56 -> Reshape_57 [label="FLOAT(batch,seq_length,96)"];
Concat_53 -> Reshape_57 [label="INT64(4)"];
Reshape_57 -> Transpose_58 [label="FLOAT(batch,seq_length,1,96)"];
SimplifiedLayerNormalization_51 -> MatMul_59 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_59 [label="FLOAT(192, 96)"];
MatMul_59 -> Reshape_60 [label="FLOAT(batch,seq_length,96)"];
Concat_53 -> Reshape_60 [label="INT64(4)"];
Reshape_60 -> Transpose_61 [label="FLOAT(batch,seq_length,1,96)"];
I_2 -> ReduceMax_62 [label="INT64(batch,seq_length)"];
ReduceMax_62 -> Add_63 [label="INT64()"];
Add_63 -> Range_64 [label="INT64()"];
Range_64 -> Reshape_65 [label="INT64(?)"];
Reshape_65 -> Cast_66 [label="INT64(?,1)"];
Cast_66 -> MatMul_67 [label="FLOAT(?,1)"];
i_6 -> MatMul_67 [label="FLOAT(1, 48)"];
MatMul_67 -> Cos_68 [label="FLOAT(?,48)"];
MatMul_67 -> Sin_69 [label="FLOAT(?,48)"];
Transpose_55 -> RotaryEmbedding_70 [label="FLOAT(batch,2,seq_length,96)"];
I_2 -> RotaryEmbedding_70 [label="INT64(batch,seq_length)"];
Cos_68 -> RotaryEmbedding_70 [label="FLOAT(?,48)"];
Sin_69 -> RotaryEmbedding_70 [label="FLOAT(?,48)"];
Transpose_58 -> RotaryEmbedding_71 [label="FLOAT(batch,1,seq_length,96)"];
I_2 -> RotaryEmbedding_71 [label="INT64(batch,seq_length)"];
Cos_68 -> RotaryEmbedding_71 [label="FLOAT(?,48)"];
Sin_69 -> RotaryEmbedding_71 [label="FLOAT(?,48)"];
I_3 -> Concat_72 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_71 -> Concat_72 [label="FLOAT(batch,\n1,\nMax(seq_length, seq_length),\n96)"];
I_4 -> Concat_73 [label="FLOAT(batch,1,cache_length,96)"];
Transpose_61 -> Concat_73 [label="FLOAT(batch,1,seq_length,96)"];
Concat_72 -> Unsqueeze_74;
Add_28 -> Reshape_75 [label="INT64()"];
Shape_18 -> Concat_76 [label="INT64(1)"];
Reshape_75 -> Concat_76 [label="INT64(1)"];
Unsqueeze_74 -> Expand_77 [label="FLOAT(batch,\n1,\n1,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_76 -> Expand_77 [label="INT64(5)"];
Shape_18 -> Concat_78 [label="INT64(1)"];
Reshape_75 -> Concat_78 [label="INT64(1)"];
Expand_77 -> Reshape_79 [label="FLOAT(batch,\n1,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_78 -> Reshape_79 [label="INT64(4)"];
Concat_73 -> Unsqueeze_80;
Add_29 -> Reshape_81 [label="INT64()"];
Shape_18 -> Concat_82 [label="INT64(1)"];
Reshape_81 -> Concat_82 [label="INT64(1)"];
Unsqueeze_80 -> Expand_83 [label="FLOAT(batch,\n1,\n1,\ncache_length + seq_length,\n96)"];
Concat_82 -> Expand_83 [label="INT64(5)"];
Shape_18 -> Concat_84 [label="INT64(1)"];
Reshape_81 -> Concat_84 [label="INT64(1)"];
Expand_83 -> Reshape_85 [label="FLOAT(batch,\n1,\n2,\ncache_length + seq_length,\n96)"];
Concat_84 -> Reshape_85 [label="INT64(4)"];
And_50 -> Slice_86 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Reshape_75 -> Slice_86 [label="INT64(1)"];
Reshape_79 -> Shape_87 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Shape_87 -> Slice_88 [label="INT64(4)"];
Shape_87 -> Slice_89 [label="INT64(4)"];
Shape_87 -> Slice_90 [label="INT64(4)"];
Slice_89 -> Concat_91 [label="INT64(1)"];
Slice_88 -> Concat_91 [label="INT64(1)"];
Reshape_79 -> Reshape_92 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_91 -> Reshape_92 [label="INT64(3)"];
Reshape_92 -> Transpose_93 [label="FLOAT(?,\ncache_length + Max(seq_length, seq_length),\n96)"];
Slice_90 -> Concat_94 [label="INT64(2)"];
Slice_88 -> Concat_94 [label="INT64(1)"];
Slice_89 -> Concat_94 [label="INT64(1)"];
Transpose_93 -> Reshape_95 [label="FLOAT(?,\n96,\ncache_length + Max(seq_length, seq_length))"];
Concat_94 -> Reshape_95 [label="INT64(4)"];
RotaryEmbedding_70 -> Mul_96 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Reshape_95 -> Mul_97 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
Slice_86 -> Where_98 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Mul_96 -> MatMul_99 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Mul_97 -> MatMul_99 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
MatMul_99 -> Add_100 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\ncache_length + Max(seq_length, seq_length))"];
Where_98 -> Add_100 [label="FLOAT(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Add_100 -> Softmax_101 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_101 -> IsNaN_102 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
IsNaN_102 -> Where_103 [label="BOOL(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_101 -> Where_103 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Where_103 -> MatMul_104 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Reshape_85 -> MatMul_104 [label="FLOAT(batch,\n2,\ncache_length + seq_length,\n96)"];
MatMul_104 -> Transpose_105 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Shape_18 -> Concat_106 [label="INT64(1)"];
Shape_16 -> Concat_106 [label="INT64(1)"];
Transpose_105 -> Reshape_107 [label="FLOAT(batch,\nMax(seq_length, seq_length),\n2,\n96)"];
Concat_106 -> Reshape_107 [label="INT64(3)"];
Reshape_107 -> MatMul_108 [label="FLOAT(batch,\nseq_length,\n((192*Max(seq_length, seq_length))//seq_length))"];
i_11 -> MatMul_108 [label="FLOAT(192, 192)"];
MatMul_108 -> SkipSimplifiedLayerNormalization_109 [label="FLOAT(batch,seq_length,192)"];
Gather_30 -> SkipSimplifiedLayerNormalization_109 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_109 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_109 -> MatMul_110 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_110 [label="FLOAT(192, 1024)"];
MatMul_110 -> Sigmoid_111 [label="FLOAT(batch,seq_length,1024)"];
MatMul_110 -> Mul_112 [label="FLOAT(batch,seq_length,1024)"];
Sigmoid_111 -> Mul_112 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_109 -> MatMul_113 [label="FLOAT(batch,seq_length,192)"];
i_13 -> MatMul_113 [label="FLOAT(192, 1024)"];
Mul_112 -> Mul_114 [label="FLOAT(batch,seq_length,1024)"];
MatMul_113 -> Mul_114 [label="FLOAT(batch,seq_length,1024)"];
Mul_114 -> MatMul_115 [label="FLOAT(batch,seq_length,1024)"];
i_14 -> MatMul_115 [label="FLOAT(1024, 192)"];
MatMul_115 -> SkipSimplifiedLayerNormalization_116 [label="FLOAT(batch,seq_length,192)"];
SkipSimplifiedLayerNormalization_109 -> SkipSimplifiedLayerNormalization_116 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_116 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_116 -> MatMul_117 [label="FLOAT(batch,seq_length,192)"];
i_15 -> MatMul_117 [label="FLOAT(192, 32000)"];
O_118 [label="linear_7\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_117 -> O_118;
O_119 [label="cat_7\nFLOAT(batch,1,cache_length + Max(seq_length, seq_length),96)", fillcolor="#aaaaee"];
Concat_72 -> O_119;
O_120 [label="cat_8\nFLOAT(batch,1,cache_length + seq_length,96)", fillcolor="#aaaaee"];
Concat_73 -> O_120;
}](../../_images/graphviz-ad14503f6f622a494f32930f8e7c087db4209039.png)
Converts a model into a dot graph. Here is an example:
![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="p_model_layers_0_self_attn_q_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_6 [label="p_model_layers_0_self_attn_k_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_7 [label="p_model_layers_0_self_attn_v_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_8 [label="p_model_layers_0_self_attn_o_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="p_model_layers_0_mlp_gate_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_10 [label="p_model_layers_0_mlp_up_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_11 [label="p_model_layers_0_mlp_down_proj_weight::T10\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_12 [label="p_lm_head_weight::T10\nFLOAT(192, 32000)", fillcolor="#cccc00"];
i_13 [label="to_422\nFLOAT(1, 1, 48)", fillcolor="#cccc00"];
i_14 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_15 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_16 [label="model.layers.0.post_attention_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_17 [label="model.norm.weight\nFLOAT(192)", fillcolor="#cccc00"];
Shape_18 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Shape_19 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Shape_20 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
ReduceMax_21 [label="ReduceMax(.)", fillcolor="#cccccc"];
Add_22 [label="Add(., 1)", fillcolor="#cccccc"];
Range_23 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Unsqueeze_24 [label="Unsqueeze(., [0])", fillcolor="#eeeeee"];
Add_25 [label="Add(., .)", fillcolor="#cccccc"];
CausalMask_26 [label="intermediate.\nCausalMask(., .)", fillcolor="#cccccc"];
Gather_27 [label="Gather(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_28 [label="SimplifiedLayerNormalization\n(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
Cast_29 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Shape_30 [label="Shape(., start=-1)", fillcolor="#d2a81f"];
CausalMaskMulAdd_31 [label="intermediate.\nCausalMaskMulAdd\n(., ., .)", fillcolor="#cccccc"];
Concat_32 [label="Concat([1], [1], ., [1], axis=0)", fillcolor="#cccccc"];
Expand_33 [label="Expand(., .)", fillcolor="#cccccc"];
Shape_34 [label="Shape(.)", fillcolor="#d2a81f"];
Reshape_35 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Reshape_36 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Gather_37 [label="Gather(., .)", fillcolor="#cccccc"];
Reshape_38 [label="Reshape(., .)", fillcolor="#eeeeee"];
And_39 [label="And(., .)", fillcolor="#cccccc"];
CosSinCache_p1_40 [label="intermediate.\nCosSinCache_p1(., .)", fillcolor="#cccccc"];
Squeeze_41 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
Squeeze_42 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
MatMul_43 [label="MatMul(., .)", fillcolor="#ee9999"];
RotaryEmbedding_44 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
Reshape_45 [label="Reshape(., [0, 0, 2, 96])", fillcolor="#eeeeee"];
Transpose_46 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_47 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_48 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
RotaryEmbedding_49 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
MatMul_50 [label="MatMul(., .)", fillcolor="#ee9999"];
Unsqueeze_51 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
Concat_52 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_53 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_54 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_55 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_56 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
Unsqueeze_57 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Expand_58 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
Squeeze_59 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
LocalAttention_to1_60 [label="intermediate.\nLocalAttention_to1\n(., ., ., ., [0.31947157])", fillcolor="#cccccc"];
Transpose_61 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Reshape_62 [label="Reshape(., [0, 0, 192])", fillcolor="#eeeeee"];
MatMul_63 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_64 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_65 [label="MatMul(., .)", fillcolor="#ee9999"];
QuickGelu_66 [label="com.microsoft.\nQuickGelu(.)", fillcolor="#cccccc"];
MatMul_67 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_68 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_69 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_70 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_71 [label="MatMul(., .)", fillcolor="#ee9999"];
I_0 -> Shape_18 [label="INT64(batch,seq_length)"];
I_2 -> Shape_19 [label="INT64(batch,seq_length)"];
I_3 -> Shape_20 [label="FLOAT(batch,1,cache_length,96)"];
I_2 -> ReduceMax_21 [label="INT64(batch,seq_length)"];
ReduceMax_21 -> Add_22 [label="INT64()"];
Add_22 -> Range_23 [label="INT64()"];
Range_23 -> Unsqueeze_24 [label="INT64(NEWDIM_range1)"];
Shape_18 -> Add_25 [label="INT64(1)"];
Shape_20 -> Add_25 [label="INT64(1)"];
Shape_20 -> CausalMask_26 [label="INT64(1)"];
Add_25 -> CausalMask_26 [label="INT64(1)"];
i_14 -> Gather_27 [label="FLOAT(32000, 192)"];
I_0 -> Gather_27 [label="INT64(batch,seq_length)"];
Gather_27 -> SimplifiedLayerNormalization_28 [label="FLOAT(batch,seq_length,192)"];
i_15 -> SimplifiedLayerNormalization_28 [label="FLOAT(192)"];
I_1 -> Cast_29 [label="INT64(batch,cache+seq)"];
Cast_29 -> Shape_30 [label="BOOL(batch,cache+seq)"];
Add_25 -> CausalMaskMulAdd_31 [label="INT64(1)"];
Shape_19 -> CausalMaskMulAdd_31 [label="INT64(1)"];
Shape_30 -> CausalMaskMulAdd_31 [label="INT64(1)"];
Shape_18 -> Concat_32 [label="INT64(1)"];
CausalMaskMulAdd_31 -> Expand_33 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
Concat_32 -> Expand_33 [label="INT64(4)"];
Expand_33 -> Shape_34 [label="INT64(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
Cast_29 -> Reshape_35 [label="BOOL(batch,cache+seq)"];
Expand_33 -> Reshape_36 [label="INT64(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
Reshape_35 -> Gather_37 [label="BOOL(batch*(cache+seq))"];
Reshape_36 -> Gather_37 [label="INT64(batch*seq_length*(cache_length+seq_length))"];
Gather_37 -> Reshape_38 [label="BOOL(batch*seq_length*(cache_length+seq_length))"];
Shape_34 -> Reshape_38 [label="INT64(4)"];
CausalMask_26 -> And_39 [label="BOOL(1,\n1,\nseq_length,\ncache_length+seq_length)"];
Reshape_38 -> And_39 [label="BOOL(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
Unsqueeze_24 -> CosSinCache_p1_40 [label="INT64(1,NEWDIM_range1)"];
i_13 -> CosSinCache_p1_40 [label="FLOAT(1, 1, 48)"];
CosSinCache_p1_40 -> Squeeze_41 [label="FLOAT(1,NEWDIM_range1,48)"];
CosSinCache_p1_40 -> Squeeze_42 [label="FLOAT(1,NEWDIM_range1,48)"];
SimplifiedLayerNormalization_28 -> MatMul_43 [label="FLOAT(batch,seq_length,192)"];
i_5 -> MatMul_43 [label="FLOAT(192, 192)"];
MatMul_43 -> RotaryEmbedding_44 [label="FLOAT(batch,seq_length,192)"];
I_2 -> RotaryEmbedding_44 [label="INT64(batch,seq_length)"];
Squeeze_42 -> RotaryEmbedding_44 [label="FLOAT(NEWDIM_range1,48)"];
Squeeze_41 -> RotaryEmbedding_44 [label="FLOAT(NEWDIM_range1,48)"];
RotaryEmbedding_44 -> Reshape_45 [label="FLOAT(batch,seq_length,192)"];
Reshape_45 -> Transpose_46 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_28 -> MatMul_47 [label="FLOAT(batch,seq_length,192)"];
i_6 -> MatMul_47 [label="FLOAT(192, 96)"];
MatMul_47 -> Unsqueeze_48 [label="FLOAT(batch,seq_length,96)"];
Unsqueeze_48 -> RotaryEmbedding_49 [label="FLOAT(batch,1,seq_length,96)"];
I_2 -> RotaryEmbedding_49 [label="INT64(batch,seq_length)"];
Squeeze_42 -> RotaryEmbedding_49 [label="FLOAT(NEWDIM_range1,48)"];
Squeeze_41 -> RotaryEmbedding_49 [label="FLOAT(NEWDIM_range1,48)"];
SimplifiedLayerNormalization_28 -> MatMul_50 [label="FLOAT(batch,seq_length,192)"];
i_7 -> MatMul_50 [label="FLOAT(192, 96)"];
MatMul_50 -> Unsqueeze_51 [label="FLOAT(batch,seq_length,96)"];
I_3 -> Concat_52 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_49 -> Concat_52 [label="FLOAT(batch,1,seq_length,96)"];
I_4 -> Concat_53 [label="FLOAT(batch,1,cache_length,96)"];
Unsqueeze_51 -> Concat_53 [label="FLOAT(batch,1,seq_length,96)"];
Concat_52 -> Unsqueeze_54;
Unsqueeze_54 -> Expand_55 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_55 -> Squeeze_56 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Concat_53 -> Unsqueeze_57;
Unsqueeze_57 -> Expand_58 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
Expand_58 -> Squeeze_59 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
Transpose_46 -> LocalAttention_to1_60 [label="FLOAT(batch,2,seq_length,96)"];
Squeeze_56 -> LocalAttention_to1_60 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
Squeeze_59 -> LocalAttention_to1_60 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
And_39 -> LocalAttention_to1_60 [label="BOOL(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
LocalAttention_to1_60 -> Transpose_61 [label="FLOAT(batch,2,seq_length,96)"];
Transpose_61 -> Reshape_62 [label="FLOAT(batch,seq_length,2,96)"];
Reshape_62 -> MatMul_63 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_63 [label="FLOAT(192, 192)"];
Gather_27 -> SkipSimplifiedLayerNormalization_64 [label="FLOAT(batch,seq_length,192)"];
MatMul_63 -> SkipSimplifiedLayerNormalization_64 [label="FLOAT(batch,seq_length,192)"];
i_16 -> SkipSimplifiedLayerNormalization_64 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_64 -> MatMul_65 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_65 [label="FLOAT(192, 1024)"];
MatMul_65 -> QuickGelu_66 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_64 -> MatMul_67 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_67 [label="FLOAT(192, 1024)"];
QuickGelu_66 -> Mul_68 [label="FLOAT(batch,seq_length,1024)"];
MatMul_67 -> Mul_68 [label="FLOAT(batch,seq_length,1024)"];
Mul_68 -> MatMul_69 [label="FLOAT(batch,seq_length,1024)"];
i_11 -> MatMul_69 [label="FLOAT(1024, 192)"];
SkipSimplifiedLayerNormalization_64 -> SkipSimplifiedLayerNormalization_70 [label="FLOAT(batch,seq_length,192)"];
MatMul_69 -> SkipSimplifiedLayerNormalization_70 [label="FLOAT(batch,seq_length,192)"];
i_17 -> SkipSimplifiedLayerNormalization_70 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_70 -> MatMul_71 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_71 [label="FLOAT(192, 32000)"];
O_72 [label="output_0\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_71 -> O_72;
O_73 [label="present_key_values_key_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_52 -> O_73;
O_74 [label="present_key_values_value_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
Concat_53 -> O_74;
}](../../_images/graphviz-8d629524be3f32957edb5449edad700102f3efc1.png)
Or this one obtained with torch.onnx.export().
![digraph {
graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
i_5 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
i_6 [label="val_341\nFLOAT(1, 48)", fillcolor="#cccc00"];
i_7 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
i_8 [label="val_119\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_9 [label="val_126\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_10 [label="val_133\nFLOAT(192, 96)", fillcolor="#cccc00"];
i_11 [label="val_288\nFLOAT(192, 192)", fillcolor="#cccc00"];
i_12 [label="val_293\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_13 [label="val_295\nFLOAT(192, 1024)", fillcolor="#cccc00"];
i_14 [label="val_296\nFLOAT(1024, 192)", fillcolor="#cccc00"];
i_15 [label="val_321\nFLOAT(192, 32000)", fillcolor="#cccc00"];
Shape_16 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_17 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_18 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
Squeeze_19 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_22 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
Shape_24 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
Add_26 [label="Add(., .)", fillcolor="#cccccc"];
Max_27 [label="Max(., .)", fillcolor="#cccccc"];
Add_28 [label="Add(., .)", fillcolor="#cccccc"];
Add_29 [label="Add(., .)", fillcolor="#cccccc"];
Gather_30 [label="Gather(., ., axis=0)", fillcolor="#cccccc"];
Range_31 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Add_32 [label="Add(., .)", fillcolor="#cccccc"];
Cast_33 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Range_35 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Reshape_36 [label="Reshape(., [-1, 1, 1, 1])", fillcolor="#eeeeee"];
Reshape_37 [label="Reshape(., [1, 1, -1, 1])", fillcolor="#eeeeee"];
Reshape_38 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_39 [label="Concat(., [1], ., ., axis=0)", fillcolor="#cccccc"];
Expand_40 [label="Expand(., .)", fillcolor="#cccccc"];
Expand_41 [label="Expand(., .)", fillcolor="#cccccc"];
Reshape_42 [label="Reshape(., [1, 1, 1, -1])", fillcolor="#eeeeee"];
Expand_43 [label="Expand(., .)", fillcolor="#cccccc"];
LessOrEqual_44 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
And_45 [label="And(True, .)", fillcolor="#cccccc"];
Unsqueeze_46 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Unsqueeze_47 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
Concat_48 [label="Concat(., ., axis=-1)", fillcolor="#cccccc"];
GatherND_49 [label="GatherND(., .)", fillcolor="#cccccc"];
And_50 [label="And(., .)", fillcolor="#cccccc"];
SimplifiedLayerNormalization_51 [label="SimplifiedLayerNormalization\n(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
MatMul_52 [label="MatMul(., .)", fillcolor="#ee9999"];
Concat_53 [label="Concat(., ., [-1], [96], axis=0)", fillcolor="#cccccc"];
Reshape_54 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_55 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_56 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_57 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_58 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
MatMul_59 [label="MatMul(., .)", fillcolor="#ee9999"];
Reshape_60 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_61 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
ReduceMax_62 [label="ReduceMax(.)", fillcolor="#cccccc"];
Add_63 [label="Add(., 1)", fillcolor="#cccccc"];
Range_64 [label="Range(0, ., 1)", fillcolor="#cccccc"];
Reshape_65 [label="Reshape(., [-1, 1])", fillcolor="#eeeeee"];
Cast_66 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
MatMul_67 [label="MatMul(., .)", fillcolor="#ee9999"];
Cos_68 [label="Cos(.)", fillcolor="#cccccc"];
Sin_69 [label="Sin(.)", fillcolor="#cccccc"];
RotaryEmbedding_70 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
RotaryEmbedding_71 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
Concat_72 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Concat_73 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
Unsqueeze_74 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_75 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_76 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_77 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_78 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_79 [label="Reshape(., .)", fillcolor="#eeeeee"];
Unsqueeze_80 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
Reshape_81 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
Concat_82 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
Expand_83 [label="Expand(., .)", fillcolor="#cccccc"];
Concat_84 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
Reshape_85 [label="Reshape(., .)", fillcolor="#eeeeee"];
Slice_86 [label="Slice(., [0], ., [3], [1])", fillcolor="#cccccc"];
Shape_87 [label="Shape(., start=0)", fillcolor="#d2a81f"];
Slice_88 [label="Slice(., [-1], [9223372036854775807])", fillcolor="#cccccc"];
Slice_89 [label="Slice(., [-2], [-1])", fillcolor="#cccccc"];
Slice_90 [label="Slice\n(., [-9223372036854775808], [-2])", fillcolor="#cccccc"];
Concat_91 [label="Concat([-1], ., ., axis=0)", fillcolor="#cccccc"];
Reshape_92 [label="Reshape(., .)", fillcolor="#eeeeee"];
Transpose_93 [label="Transpose(., perm=[0, 2, 1])", fillcolor="#ee99ee"];
Concat_94 [label="Concat(., ., ., axis=0)", fillcolor="#cccccc"];
Reshape_95 [label="Reshape(., .)", fillcolor="#eeeeee"];
Mul_96 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Mul_97 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
Where_98 [label="Where(., 0.0, -3.4028235e+38)", fillcolor="#cccccc"];
MatMul_99 [label="MatMul(., .)", fillcolor="#ee9999"];
Add_100 [label="Add(., .)", fillcolor="#cccccc"];
Softmax_101 [label="Softmax(., axis=-1)", fillcolor="#cccccc"];
IsNaN_102 [label="IsNaN(.)", fillcolor="#cccccc"];
Where_103 [label="Where(., 0.0, .)", fillcolor="#cccccc"];
MatMul_104 [label="MatMul(., .)", fillcolor="#ee9999"];
Transpose_105 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
Concat_106 [label="Concat(., ., [-1], axis=0)", fillcolor="#cccccc"];
Reshape_107 [label="Reshape(., .)", fillcolor="#eeeeee"];
MatMul_108 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_109 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_110 [label="MatMul(., .)", fillcolor="#ee9999"];
Sigmoid_111 [label="Sigmoid(.)", fillcolor="#cccccc"];
Mul_112 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_113 [label="MatMul(., .)", fillcolor="#ee9999"];
Mul_114 [label="Mul(., .)", fillcolor="#cccccc"];
MatMul_115 [label="MatMul(., .)", fillcolor="#ee9999"];
SkipSimplifiedLayerNormalization_116 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
MatMul_117 [label="MatMul(., .)", fillcolor="#ee9999"];
I_0 -> Shape_16 [label="INT64(batch,seq_length)"];
Shape_16 -> Squeeze_17 [label="INT64(1)"];
I_2 -> Shape_18 [label="INT64(batch,seq_length)"];
Shape_18 -> Squeeze_19 [label="INT64(1)"];
I_2 -> Shape_20 [label="INT64(batch,seq_length)"];
Shape_20 -> Squeeze_21 [label="INT64(1)"];
I_3 -> Shape_22 [label="FLOAT(batch,1,cache_length,96)"];
Shape_22 -> Squeeze_23 [label="INT64(1)"];
I_4 -> Shape_24 [label="FLOAT(batch,1,cache_length,96)"];
Shape_24 -> Squeeze_25 [label="INT64(1)"];
Squeeze_23 -> Add_26 [label="INT64()"];
Squeeze_17 -> Add_26 [label="INT64()"];
Squeeze_17 -> Max_27 [label="INT64()"];
Squeeze_21 -> Max_27 [label="INT64()"];
Squeeze_23 -> Add_28 [label="INT64()"];
Max_27 -> Add_28 [label="INT64()"];
Squeeze_25 -> Add_29 [label="INT64()"];
Squeeze_17 -> Add_29 [label="INT64()"];
i_7 -> Gather_30 [label="FLOAT(32000, 192)"];
I_0 -> Gather_30 [label="INT64(batch,seq_length)"];
Squeeze_17 -> Range_31 [label="INT64()"];
Range_31 -> Add_32 [label="INT64(seq_length)"];
Squeeze_23 -> Add_32 [label="INT64()"];
I_1 -> Cast_33 [label="INT64(batch,cache+seq)"];
Add_26 -> Range_34 [label="INT64()"];
Squeeze_19 -> Range_35 [label="INT64()"];
Range_35 -> Reshape_36 [label="INT64(batch)"];
Add_32 -> Reshape_37 [label="INT64(seq_length)"];
Add_26 -> Reshape_38 [label="INT64()"];
Shape_18 -> Concat_39 [label="INT64(1)"];
Shape_16 -> Concat_39 [label="INT64(1)"];
Reshape_38 -> Concat_39 [label="INT64(1)"];
Reshape_36 -> Expand_40 [label="INT64(batch,1,1,1)"];
Concat_39 -> Expand_40 [label="INT64(4)"];
Reshape_37 -> Expand_41 [label="INT64(1,1,seq_length,1)"];
Concat_39 -> Expand_41 [label="INT64(4)"];
Range_34 -> Reshape_42 [label="INT64(cache_length + seq_length)"];
Reshape_42 -> Expand_43 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
Concat_39 -> Expand_43 [label="INT64(4)"];
Expand_43 -> LessOrEqual_44 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Expand_41 -> LessOrEqual_44 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
LessOrEqual_44 -> And_45 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Expand_40 -> Unsqueeze_46 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Expand_43 -> Unsqueeze_47 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Unsqueeze_46 -> Concat_48 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length,\n1)"];
Unsqueeze_47 -> Concat_48 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length,\n1)"];
Cast_33 -> GatherND_49 [label="BOOL(batch,cache+seq)"];
Concat_48 -> GatherND_49 [label="INT64(batch,\n1,\nseq_length,\ncache_length + seq_length,\n2)"];
And_45 -> And_50 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
GatherND_49 -> And_50 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Gather_30 -> SimplifiedLayerNormalization_51 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SimplifiedLayerNormalization_51 [label="FLOAT(192)"];
SimplifiedLayerNormalization_51 -> MatMul_52 [label="FLOAT(batch,seq_length,192)"];
i_8 -> MatMul_52 [label="FLOAT(192, 192)"];
Shape_18 -> Concat_53 [label="INT64(1)"];
Shape_16 -> Concat_53 [label="INT64(1)"];
MatMul_52 -> Reshape_54 [label="FLOAT(batch,seq_length,192)"];
Concat_53 -> Reshape_54 [label="INT64(4)"];
Reshape_54 -> Transpose_55 [label="FLOAT(batch,seq_length,2,96)"];
SimplifiedLayerNormalization_51 -> MatMul_56 [label="FLOAT(batch,seq_length,192)"];
i_9 -> MatMul_56 [label="FLOAT(192, 96)"];
MatMul_56 -> Reshape_57 [label="FLOAT(batch,seq_length,96)"];
Concat_53 -> Reshape_57 [label="INT64(4)"];
Reshape_57 -> Transpose_58 [label="FLOAT(batch,seq_length,1,96)"];
SimplifiedLayerNormalization_51 -> MatMul_59 [label="FLOAT(batch,seq_length,192)"];
i_10 -> MatMul_59 [label="FLOAT(192, 96)"];
MatMul_59 -> Reshape_60 [label="FLOAT(batch,seq_length,96)"];
Concat_53 -> Reshape_60 [label="INT64(4)"];
Reshape_60 -> Transpose_61 [label="FLOAT(batch,seq_length,1,96)"];
I_2 -> ReduceMax_62 [label="INT64(batch,seq_length)"];
ReduceMax_62 -> Add_63 [label="INT64()"];
Add_63 -> Range_64 [label="INT64()"];
Range_64 -> Reshape_65 [label="INT64(?)"];
Reshape_65 -> Cast_66 [label="INT64(?,1)"];
Cast_66 -> MatMul_67 [label="FLOAT(?,1)"];
i_6 -> MatMul_67 [label="FLOAT(1, 48)"];
MatMul_67 -> Cos_68 [label="FLOAT(?,48)"];
MatMul_67 -> Sin_69 [label="FLOAT(?,48)"];
Transpose_55 -> RotaryEmbedding_70 [label="FLOAT(batch,2,seq_length,96)"];
I_2 -> RotaryEmbedding_70 [label="INT64(batch,seq_length)"];
Cos_68 -> RotaryEmbedding_70 [label="FLOAT(?,48)"];
Sin_69 -> RotaryEmbedding_70 [label="FLOAT(?,48)"];
Transpose_58 -> RotaryEmbedding_71 [label="FLOAT(batch,1,seq_length,96)"];
I_2 -> RotaryEmbedding_71 [label="INT64(batch,seq_length)"];
Cos_68 -> RotaryEmbedding_71 [label="FLOAT(?,48)"];
Sin_69 -> RotaryEmbedding_71 [label="FLOAT(?,48)"];
I_3 -> Concat_72 [label="FLOAT(batch,1,cache_length,96)"];
RotaryEmbedding_71 -> Concat_72 [label="FLOAT(batch,\n1,\nMax(seq_length, seq_length),\n96)"];
I_4 -> Concat_73 [label="FLOAT(batch,1,cache_length,96)"];
Transpose_61 -> Concat_73 [label="FLOAT(batch,1,seq_length,96)"];
Concat_72 -> Unsqueeze_74;
Add_28 -> Reshape_75 [label="INT64()"];
Shape_18 -> Concat_76 [label="INT64(1)"];
Reshape_75 -> Concat_76 [label="INT64(1)"];
Unsqueeze_74 -> Expand_77 [label="FLOAT(batch,\n1,\n1,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_76 -> Expand_77 [label="INT64(5)"];
Shape_18 -> Concat_78 [label="INT64(1)"];
Reshape_75 -> Concat_78 [label="INT64(1)"];
Expand_77 -> Reshape_79 [label="FLOAT(batch,\n1,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_78 -> Reshape_79 [label="INT64(4)"];
Concat_73 -> Unsqueeze_80;
Add_29 -> Reshape_81 [label="INT64()"];
Shape_18 -> Concat_82 [label="INT64(1)"];
Reshape_81 -> Concat_82 [label="INT64(1)"];
Unsqueeze_80 -> Expand_83 [label="FLOAT(batch,\n1,\n1,\ncache_length + seq_length,\n96)"];
Concat_82 -> Expand_83 [label="INT64(5)"];
Shape_18 -> Concat_84 [label="INT64(1)"];
Reshape_81 -> Concat_84 [label="INT64(1)"];
Expand_83 -> Reshape_85 [label="FLOAT(batch,\n1,\n2,\ncache_length + seq_length,\n96)"];
Concat_84 -> Reshape_85 [label="INT64(4)"];
And_50 -> Slice_86 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
Reshape_75 -> Slice_86 [label="INT64(1)"];
Reshape_79 -> Shape_87 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Shape_87 -> Slice_88 [label="INT64(4)"];
Shape_87 -> Slice_89 [label="INT64(4)"];
Shape_87 -> Slice_90 [label="INT64(4)"];
Slice_89 -> Concat_91 [label="INT64(1)"];
Slice_88 -> Concat_91 [label="INT64(1)"];
Reshape_79 -> Reshape_92 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
Concat_91 -> Reshape_92 [label="INT64(3)"];
Reshape_92 -> Transpose_93 [label="FLOAT(?,\ncache_length + Max(seq_length, seq_length),\n96)"];
Slice_90 -> Concat_94 [label="INT64(2)"];
Slice_88 -> Concat_94 [label="INT64(1)"];
Slice_89 -> Concat_94 [label="INT64(1)"];
Transpose_93 -> Reshape_95 [label="FLOAT(?,\n96,\ncache_length + Max(seq_length, seq_length))"];
Concat_94 -> Reshape_95 [label="INT64(4)"];
RotaryEmbedding_70 -> Mul_96 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Reshape_95 -> Mul_97 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
Slice_86 -> Where_98 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Mul_96 -> MatMul_99 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Mul_97 -> MatMul_99 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
MatMul_99 -> Add_100 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\ncache_length + Max(seq_length, seq_length))"];
Where_98 -> Add_100 [label="FLOAT(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
Add_100 -> Softmax_101 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_101 -> IsNaN_102 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
IsNaN_102 -> Where_103 [label="BOOL(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Softmax_101 -> Where_103 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Where_103 -> MatMul_104 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
Reshape_85 -> MatMul_104 [label="FLOAT(batch,\n2,\ncache_length + seq_length,\n96)"];
MatMul_104 -> Transpose_105 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
Shape_18 -> Concat_106 [label="INT64(1)"];
Shape_16 -> Concat_106 [label="INT64(1)"];
Transpose_105 -> Reshape_107 [label="FLOAT(batch,\nMax(seq_length, seq_length),\n2,\n96)"];
Concat_106 -> Reshape_107 [label="INT64(3)"];
Reshape_107 -> MatMul_108 [label="FLOAT(batch,\nseq_length,\n((192*Max(seq_length, seq_length))//seq_length))"];
i_11 -> MatMul_108 [label="FLOAT(192, 192)"];
MatMul_108 -> SkipSimplifiedLayerNormalization_109 [label="FLOAT(batch,seq_length,192)"];
Gather_30 -> SkipSimplifiedLayerNormalization_109 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_109 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_109 -> MatMul_110 [label="FLOAT(batch,seq_length,192)"];
i_12 -> MatMul_110 [label="FLOAT(192, 1024)"];
MatMul_110 -> Sigmoid_111 [label="FLOAT(batch,seq_length,1024)"];
MatMul_110 -> Mul_112 [label="FLOAT(batch,seq_length,1024)"];
Sigmoid_111 -> Mul_112 [label="FLOAT(batch,seq_length,1024)"];
SkipSimplifiedLayerNormalization_109 -> MatMul_113 [label="FLOAT(batch,seq_length,192)"];
i_13 -> MatMul_113 [label="FLOAT(192, 1024)"];
Mul_112 -> Mul_114 [label="FLOAT(batch,seq_length,1024)"];
MatMul_113 -> Mul_114 [label="FLOAT(batch,seq_length,1024)"];
Mul_114 -> MatMul_115 [label="FLOAT(batch,seq_length,1024)"];
i_14 -> MatMul_115 [label="FLOAT(1024, 192)"];
MatMul_115 -> SkipSimplifiedLayerNormalization_116 [label="FLOAT(batch,seq_length,192)"];
SkipSimplifiedLayerNormalization_109 -> SkipSimplifiedLayerNormalization_116 [label="FLOAT(batch,seq_length,192)"];
i_5 -> SkipSimplifiedLayerNormalization_116 [label="FLOAT(192)"];
SkipSimplifiedLayerNormalization_116 -> MatMul_117 [label="FLOAT(batch,seq_length,192)"];
i_15 -> MatMul_117 [label="FLOAT(192, 32000)"];
O_118 [label="linear_7\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
MatMul_117 -> O_118;
O_119 [label="cat_7\nFLOAT(batch,1,cache_length + Max(seq_length, seq_length),96)", fillcolor="#aaaaee"];
Concat_72 -> O_119;
O_120 [label="cat_8\nFLOAT(batch,1,cache_length + seq_length,96)", fillcolor="#aaaaee"];
Concat_73 -> O_120;
}](../../_images/graphviz-ad14503f6f622a494f32930f8e7c087db4209039.png)