Contents Menu Expand Light mode Dark mode Auto light/dark, in light mode Auto light/dark, in dark mode Skip to content
onnx-diagnostic 0.8.6 documentation
Logo
onnx-diagnostic 0.8.6 documentation

Contents

  • Patches Explained
    • Exporter Status
      • Exported Programs with Dynamic Shapes
      • Exported ONNX with Dynamic Shapes
      • Coverage of the Patches
      • Patches Diff
  • API of onnx_diagnostic
    • onnx_diagnostic.ci_models
      • onnx_diagnostic.ci_models.ci_helpers
      • onnx_diagnostic.ci_models.export_qwen25_vl
    • onnx_diagnostic.export
      • onnx_diagnostic.export.api
      • onnx_diagnostic.export.control_flow_onnx
      • onnx_diagnostic.export.dynamic_shapes
      • onnx_diagnostic.export.onnx_plug
      • onnx_diagnostic.export.shape_helper
      • onnx_diagnostic.export.validate
      • onnx_diagnostic.export.cf_simple_loop_for
    • onnx_diagnostic.helpers
      • onnx_diagnostic.helpers.args_helper
      • onnx_diagnostic.helpers.bench_run
      • onnx_diagnostic.helpers.cache_helper
      • onnx_diagnostic.helpers.config_helper
      • onnx_diagnostic.helpers.doc_helper
      • onnx_diagnostic.helpers.dot_helper
      • onnx_diagnostic.helpers.fake_tensor_helper
      • onnx_diagnostic.helpers.graph_helper
      • onnx_diagnostic.helpers.helper
      • onnx_diagnostic.helpers._log_helper
      • onnx_diagnostic.helpers.log_helper
      • onnx_diagnostic.helpers.memory_peak
      • onnx_diagnostic.helpers.mini_onnx_builder
      • onnx_diagnostic.helpers.model_builder_helper
      • onnx_diagnostic.helpers.onnx_helper
      • onnx_diagnostic.helpers.ort_session
      • onnx_diagnostic.helpers.rt_helper
      • onnx_diagnostic.helpers.torch_fx_graph_helper
      • onnx_diagnostic.helpers.torch_helper
    • onnx_diagnostic.reference
      • onnx_diagnostic.reference.ops
        • onnx_diagnostic.reference.ops.op_add_add_mul_mul
        • onnx_diagnostic.reference.ops.op_average_pool_grad
        • onnx_diagnostic.reference.ops.op_cast_like
        • onnx_diagnostic.reference.ops.op_complex
        • onnx_diagnostic.reference.ops.op_concat
        • onnx_diagnostic.reference.ops.op_constant_of_shape
        • onnx_diagnostic.reference.ops.op_fused_matmul
        • onnx_diagnostic.reference.ops.op_gather_grad
        • onnx_diagnostic.reference.ops.op_memcpy_host
        • onnx_diagnostic.reference.ops.op_mul_sigmoid
        • onnx_diagnostic.reference.ops.op_negxplus1
        • onnx_diagnostic.reference.ops.op_quick_gelu
        • onnx_diagnostic.reference.ops.op_replace_zero
        • onnx_diagnostic.reference.ops.op_rotary
        • onnx_diagnostic.reference.ops.op_qlinear_average_pool
        • onnx_diagnostic.reference.ops.op_qlinear_conv
        • onnx_diagnostic.reference.ops.op_scatter_elements
        • onnx_diagnostic.reference.ops.op_scatternd_of_shape
        • onnx_diagnostic.reference.ops.op_simplified_layer_normalization
        • onnx_diagnostic.reference.ops.op_skip_layer_normalization
        • onnx_diagnostic.reference.ops.op_slice
        • onnx_diagnostic.reference.ops.op_transpose_cast
        • onnx_diagnostic.reference.ops.op_tri_matrix
      • onnx_diagnostic.reference.torch_ops
        • onnx_diagnostic.reference.torch_ops.access_ops
        • onnx_diagnostic.reference.torch_ops.binary_ops
        • onnx_diagnostic.reference.torch_ops.controlflow_ops
        • onnx_diagnostic.reference.torch_ops.generator_ops
        • onnx_diagnostic.reference.torch_ops.nn_ops
        • onnx_diagnostic.reference.torch_ops.other_ops
        • onnx_diagnostic.reference.torch_ops.reduce_ops
        • onnx_diagnostic.reference.torch_ops.sequence_ops
        • onnx_diagnostic.reference.torch_ops.shape_ops
        • onnx_diagnostic.reference.torch_ops.unary_ops
      • onnx_diagnostic.reference.evaluator
      • onnx_diagnostic.reference.quantized_tensor
      • onnx_diagnostic.reference.ort_evaluator
      • onnx_diagnostic.reference.report_results_comparison
      • onnx_diagnostic.reference.torch_evaluator
    • onnx_diagnostic.tasks
      • onnx_diagnostic.tasks.automatic_speech_recognition
      • onnx_diagnostic.tasks.fill_mask
      • onnx_diagnostic.tasks.feature_extraction
      • onnx_diagnostic.tasks.image_classification
      • onnx_diagnostic.export.image_text_to_text
      • onnx_diagnostic.tasks.mixture_of_expert
      • onnx_diagnostic.tasks.object_detection
      • onnx_diagnostic.tasks.sentence_similarity
      • onnx_diagnostic.tasks.summarization
      • onnx_diagnostic.tasks.text_classification
      • onnx_diagnostic.tasks.text_generation
      • onnx_diagnostic.tasks.text_to_image
      • onnx_diagnostic.tasks.text2text_generation
      • onnx_diagnostic.tasks.zero_shot_image_classification
    • onnx_diagnostic.torch_export_patches
      • onnx_diagnostic.torch_export_patches.eval
        • onnx_diagnostic.torch_export_patches.eval.model_cases
      • onnx_diagnostic.torch_export_patches.onnx_export_errors
      • onnx_diagnostic.torch_export_patches.onnx_export_serialization
      • onnx_diagnostic.torch_export_patches.patches
        • onnx_diagnostic.torch_export_patches.patches.patch_torch
        • onnx_diagnostic.torch_export_patches.patches.patch_transformers
      • onnx_diagnostic.torch_export_patches.patch_details
      • onnx_diagnostic.torch_export_patches.patch_expressions
      • onnx_diagnostic.torch_export_patches.patch_inputs
      • onnx_diagnostic.torch_export_patches.patch_module
      • onnx_diagnostic.torch_export_patches.patch_module_helper
      • onnx_diagnostic.torch_export_patches.serialization
        • onnx_diagnostic.torch_export_patches.serialization.diffusers_impl
        • onnx_diagnostic.torch_export_patches.serialization.transformers_impl
    • onnx_diagnostic.torch_models
      • onnx_diagnostic.torch_models.code_sample
      • onnx_diagnostic.torch_models.hghub
        • onnx_diagnostic.torch_models.hghub.hub_api
        • onnx_diagnostic.torch_models.hghub.hub_data
        • onnx_diagnostic.torch_models.hghub.model_inputs
      • onnx_diagnostic.torch_models.llms
      • onnx_diagnostic.torch_models.validate
    • onnx_diagnostic.torch_onnx
      • onnx_diagnostic.torch_onnx.compare
      • onnx_diagnostic.torch_onnx.runtime_info
      • onnx_diagnostic.torch_onnx.sbs
      • onnx_diagnostic.torch_onnx.sbs_dataclasses
    • onnx_diagnostic.api
    • onnx_diagnostic.ext_test_case
  • Command Lines
    • -m onnx_diagnostic compare … compares two models
    • -m onnx_diagnostic config … prints the config for a model id
    • -m onnx_diagnostic sbs … runs a side-by-side torch/onnx
    • -m onnx_diagnostic validate … validate a model id
  • Examples Gallery
    • Dumps intermediate results of a torch model
    • Dynamic Shapes for *args, **kwargs
    • Export Tiny-LLM with patches
    • Export microsoft/phi-2
    • Export with DynamicCache and guessed dynamic shapes
    • Export with dynamic dimensions in {0,1}
    • Export with dynamic dimensions in {0,1} into ONNX
    • Export with dynamic dimensions in {0,1} into ONNX (custom)
    • Find and fix an export issue due to dynamic shapes
    • Find where a model is failing by running submodels
    • Intermediate results with (ONNX) ReferenceEvaluator
    • Intermediate results with onnxruntime
    • Steel method forward to guess inputs and dynamic shapes (with Tiny-LLM)
    • Test the export on untrained models
  • Common Export Issues
    • 0, 1, 2 for a Dynamic Dimension in the dummy example to export a model
    • Builds dynamic shapes from any input
    • Cannot export torch.sym_max(x.shape[0], y.shape[0])
    • Do not use python int with dynamic shapes
    • Export a model with a control flow (If)
    • Half certain nonzero
    • JSON returns list when the original dynamic shapes are list or tuple
    • Use DYNAMIC or AUTO when exporting if dynamic shapes has constraints
  • Technical Details
    • Dynamic Shapes and Broadcasting
    • From a LLM to processing a prompt
    • Gemm or Matmul + Add
    • LayerNormalization implementation cannot be exchanged
    • Reproducible Parallelized Reduction is difficult

More

  • Change Logs
  • License
Back to top
View this page

onnx_diagnostic.helpers.dot_helper¶

onnx_diagnostic.helpers.dot_helper.to_dot(model: ModelProto) → str[source][source]¶

Converts a model into a dot graph. Here is an example:

digraph {
  graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
  node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
  edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
  I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
  I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
  I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
  I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
  I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
  i_5 [label="p_model_layers_0_self_attn_q_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
  i_6 [label="p_model_layers_0_self_attn_k_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
  i_7 [label="p_model_layers_0_self_attn_v_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
  i_8 [label="p_model_layers_0_self_attn_o_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
  i_9 [label="p_model_layers_0_mlp_gate_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
  i_10 [label="p_model_layers_0_mlp_up_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
  i_11 [label="p_model_layers_0_mlp_down_proj_weight::T10\nFLOAT(1024, 192)", fillcolor="#cccc00"];
  i_12 [label="p_lm_head_weight::T10\nFLOAT(192, 32000)", fillcolor="#cccc00"];
  i_13 [label="to_422\nFLOAT(1, 1, 48)", fillcolor="#cccc00"];
  i_14 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
  i_15 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
  i_16 [label="model.layers.0.post_attention_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
  i_17 [label="model.norm.weight\nFLOAT(192)", fillcolor="#cccc00"];
  ReduceMax_18 [label="ReduceMax(.)", fillcolor="#cccccc"];
  Add_19 [label="Add(., 1)", fillcolor="#cccccc"];
  Range_20 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Unsqueeze_21 [label="Unsqueeze(., [0])", fillcolor="#eeeeee"];
  Shape_22 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
  Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_24 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
  Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_26 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
  Add_27 [label="Add(., .)", fillcolor="#cccccc"];
  Squeeze_28 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Gather_29 [label="Gather(., .)", fillcolor="#cccccc"];
  SimplifiedLayerNormalization_30 [label="SimplifiedLayerNormalization\n(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
  Range_31 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Add_32 [label="Add(., .)", fillcolor="#cccccc"];
  Cast_33 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
  Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Range_35 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Unsqueeze_36 [label="Unsqueeze(., [1, 2, 3])", fillcolor="#eeeeee"];
  Unsqueeze_37 [label="Unsqueeze(., [0, 1, 3])", fillcolor="#eeeeee"];
  Unsqueeze_38 [label="Unsqueeze(., [0, 1, 2])", fillcolor="#eeeeee"];
  LessOrEqual_39 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
  Shape_40 [label="Shape(., start=-1)", fillcolor="#d2a81f"];
  Mul_41 [label="Mul(., .)", fillcolor="#cccccc"];
  Add_42 [label="Add(., .)", fillcolor="#cccccc"];
  Reshape_43 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
  Reshape_44 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
  Gather_45 [label="Gather(., .)", fillcolor="#cccccc"];
  Shape_46 [label="Shape(.)", fillcolor="#d2a81f"];
  Reshape_47 [label="Reshape(., .)", fillcolor="#eeeeee"];
  And_48 [label="And(., .)", fillcolor="#cccccc"];
  CosSinCache_p1_49 [label="intermediate.\nCosSinCache_p1(., .)", fillcolor="#cccccc"];
  Squeeze_50 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
  Squeeze_51 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
  MatMul_52 [label="MatMul(., .)", fillcolor="#ee9999"];
  RotaryEmbedding_53 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
  Reshape_54 [label="Reshape(., [0, 0, 2, 96])", fillcolor="#eeeeee"];
  Transpose_55 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  MatMul_56 [label="MatMul(., .)", fillcolor="#ee9999"];
  Unsqueeze_57 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
  RotaryEmbedding_58 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
  MatMul_59 [label="MatMul(., .)", fillcolor="#ee9999"];
  Unsqueeze_60 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
  Concat_61 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
  Concat_62 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
  Unsqueeze_63 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
  Expand_64 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
  Squeeze_65 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
  Unsqueeze_66 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
  Expand_67 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
  Squeeze_68 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
  LocalAttention_to1_69 [label="intermediate.\nLocalAttention_to1\n(., ., ., ., [0.31947157])", fillcolor="#cccccc"];
  Transpose_70 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  Reshape_71 [label="Reshape(., [0, 0, 192])", fillcolor="#eeeeee"];
  MatMul_72 [label="MatMul(., .)", fillcolor="#ee9999"];
  SkipSimplifiedLayerNormalization_73 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
  MatMul_74 [label="MatMul(., .)", fillcolor="#ee9999"];
  QuickGelu_75 [label="com.microsoft.\nQuickGelu(.)", fillcolor="#cccccc"];
  MatMul_76 [label="MatMul(., .)", fillcolor="#ee9999"];
  Mul_77 [label="Mul(., .)", fillcolor="#cccccc"];
  MatMul_78 [label="MatMul(., .)", fillcolor="#ee9999"];
  SkipSimplifiedLayerNormalization_79 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
  MatMul_80 [label="MatMul(., .)", fillcolor="#ee9999"];
  I_2 -> ReduceMax_18 [label="INT64(batch,seq_length)"];
  ReduceMax_18 -> Add_19 [label="INT64()"];
  Add_19 -> Range_20 [label="INT64()"];
  Range_20 -> Unsqueeze_21 [label="INT64(NEWDIM_range1)"];
  I_0 -> Shape_22 [label="INT64(batch,seq_length)"];
  Shape_22 -> Squeeze_23 [label="INT64(1)"];
  I_2 -> Shape_24 [label="INT64(batch,seq_length)"];
  Shape_24 -> Squeeze_25 [label="INT64(1)"];
  I_3 -> Shape_26 [label="FLOAT(batch,1,cache_length,96)"];
  Shape_26 -> Add_27 [label="INT64(1)"];
  Shape_22 -> Add_27 [label="INT64(1)"];
  Add_27 -> Squeeze_28 [label="INT64(1)"];
  i_14 -> Gather_29 [label="FLOAT(32000, 192)"];
  I_0 -> Gather_29 [label="INT64(batch,seq_length)"];
  Gather_29 -> SimplifiedLayerNormalization_30 [label="FLOAT(batch,seq_length,192)"];
  i_15 -> SimplifiedLayerNormalization_30 [label="FLOAT(192)"];
  Squeeze_23 -> Range_31 [label="INT64()"];
  Range_31 -> Add_32 [label="INT64(seq_length)"];
  Shape_26 -> Add_32 [label="INT64(1)"];
  I_1 -> Cast_33 [label="INT64(batch,cache+seq)"];
  Squeeze_25 -> Range_34 [label="INT64()"];
  Squeeze_28 -> Range_35 [label="INT64()"];
  Range_34 -> Unsqueeze_36 [label="INT64(batch)"];
  Add_32 -> Unsqueeze_37 [label="INT64(seq_length)"];
  Range_35 -> Unsqueeze_38 [label="INT64(cache_length+seq_length)"];
  Unsqueeze_38 -> LessOrEqual_39 [label="INT64(1,1,1,cache_length+seq_length)"];
  Unsqueeze_37 -> LessOrEqual_39 [label="INT64(1,1,seq_length,1)"];
  Cast_33 -> Shape_40 [label="BOOL(batch,cache+seq)"];
  Unsqueeze_36 -> Mul_41 [label="INT64(batch,1,1,1)"];
  Shape_40 -> Mul_41 [label="INT64(1)"];
  Unsqueeze_38 -> Add_42 [label="INT64(1,1,1,cache_length+seq_length)"];
  Mul_41 -> Add_42 [label="INT64(batch,1,1,1)"];
  Cast_33 -> Reshape_43 [label="BOOL(batch,cache+seq)"];
  Add_42 -> Reshape_44 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
  Reshape_43 -> Gather_45 [label="BOOL(batch*(cache+seq))"];
  Reshape_44 -> Gather_45 [label="INT64(batch*(cache_length+seq_length))"];
  Add_42 -> Shape_46 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
  Gather_45 -> Reshape_47 [label="BOOL(batch*(cache_length+seq_length))"];
  Shape_46 -> Reshape_47 [label="INT64(4)"];
  LessOrEqual_39 -> And_48 [label="BOOL(1,\n1,\nseq_length,\ncache_length+seq_length)"];
  Reshape_47 -> And_48 [label="BOOL(batch,\n1,\n1,\ncache_length+seq_length)"];
  Unsqueeze_21 -> CosSinCache_p1_49 [label="INT64(1,NEWDIM_range1)"];
  i_13 -> CosSinCache_p1_49 [label="FLOAT(1, 1, 48)"];
  CosSinCache_p1_49 -> Squeeze_50 [label="FLOAT(1,NEWDIM_range1,48)"];
  CosSinCache_p1_49 -> Squeeze_51 [label="FLOAT(1,NEWDIM_range1,48)"];
  SimplifiedLayerNormalization_30 -> MatMul_52 [label="FLOAT(batch,seq_length,192)"];
  i_5 -> MatMul_52 [label="FLOAT(192, 192)"];
  MatMul_52 -> RotaryEmbedding_53 [label="FLOAT(batch,seq_length,192)"];
  I_2 -> RotaryEmbedding_53 [label="INT64(batch,seq_length)"];
  Squeeze_51 -> RotaryEmbedding_53 [label="FLOAT(NEWDIM_range1,48)"];
  Squeeze_50 -> RotaryEmbedding_53 [label="FLOAT(NEWDIM_range1,48)"];
  RotaryEmbedding_53 -> Reshape_54 [label="FLOAT(batch,seq_length,192)"];
  Reshape_54 -> Transpose_55 [label="FLOAT(batch,seq_length,2,96)"];
  SimplifiedLayerNormalization_30 -> MatMul_56 [label="FLOAT(batch,seq_length,192)"];
  i_6 -> MatMul_56 [label="FLOAT(192, 96)"];
  MatMul_56 -> Unsqueeze_57 [label="FLOAT(batch,seq_length,96)"];
  Unsqueeze_57 -> RotaryEmbedding_58 [label="FLOAT(batch,1,seq_length,96)"];
  I_2 -> RotaryEmbedding_58 [label="INT64(batch,seq_length)"];
  Squeeze_51 -> RotaryEmbedding_58 [label="FLOAT(NEWDIM_range1,48)"];
  Squeeze_50 -> RotaryEmbedding_58 [label="FLOAT(NEWDIM_range1,48)"];
  SimplifiedLayerNormalization_30 -> MatMul_59 [label="FLOAT(batch,seq_length,192)"];
  i_7 -> MatMul_59 [label="FLOAT(192, 96)"];
  MatMul_59 -> Unsqueeze_60 [label="FLOAT(batch,seq_length,96)"];
  I_3 -> Concat_61 [label="FLOAT(batch,1,cache_length,96)"];
  RotaryEmbedding_58 -> Concat_61 [label="FLOAT(batch,1,seq_length,96)"];
  I_4 -> Concat_62 [label="FLOAT(batch,1,cache_length,96)"];
  Unsqueeze_60 -> Concat_62 [label="FLOAT(batch,1,seq_length,96)"];
  Concat_61 -> Unsqueeze_63;
  Unsqueeze_63 -> Expand_64 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
  Expand_64 -> Squeeze_65 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
  Concat_62 -> Unsqueeze_66;
  Unsqueeze_66 -> Expand_67 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
  Expand_67 -> Squeeze_68 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
  Transpose_55 -> LocalAttention_to1_69 [label="FLOAT(batch,2,seq_length,96)"];
  Squeeze_65 -> LocalAttention_to1_69 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
  Squeeze_68 -> LocalAttention_to1_69 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
  And_48 -> LocalAttention_to1_69 [label="BOOL(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
  LocalAttention_to1_69 -> Transpose_70 [label="FLOAT(batch,2,seq_length,96)"];
  Transpose_70 -> Reshape_71 [label="FLOAT(batch,seq_length,2,96)"];
  Reshape_71 -> MatMul_72 [label="FLOAT(batch,seq_length,192)"];
  i_8 -> MatMul_72 [label="FLOAT(192, 192)"];
  Gather_29 -> SkipSimplifiedLayerNormalization_73 [label="FLOAT(batch,seq_length,192)"];
  MatMul_72 -> SkipSimplifiedLayerNormalization_73 [label="FLOAT(batch,seq_length,192)"];
  i_16 -> SkipSimplifiedLayerNormalization_73 [label="FLOAT(192)"];
  SkipSimplifiedLayerNormalization_73 -> MatMul_74 [label="FLOAT(batch,seq_length,192)"];
  i_9 -> MatMul_74 [label="FLOAT(192, 1024)"];
  MatMul_74 -> QuickGelu_75 [label="FLOAT(batch,seq_length,1024)"];
  SkipSimplifiedLayerNormalization_73 -> MatMul_76 [label="FLOAT(batch,seq_length,192)"];
  i_10 -> MatMul_76 [label="FLOAT(192, 1024)"];
  QuickGelu_75 -> Mul_77 [label="FLOAT(batch,seq_length,1024)"];
  MatMul_76 -> Mul_77 [label="FLOAT(batch,seq_length,1024)"];
  Mul_77 -> MatMul_78 [label="FLOAT(batch,seq_length,1024)"];
  i_11 -> MatMul_78 [label="FLOAT(1024, 192)"];
  SkipSimplifiedLayerNormalization_73 -> SkipSimplifiedLayerNormalization_79 [label="FLOAT(batch,seq_length,192)"];
  MatMul_78 -> SkipSimplifiedLayerNormalization_79 [label="FLOAT(batch,seq_length,192)"];
  i_17 -> SkipSimplifiedLayerNormalization_79 [label="FLOAT(192)"];
  SkipSimplifiedLayerNormalization_79 -> MatMul_80 [label="FLOAT(batch,seq_length,192)"];
  i_12 -> MatMul_80 [label="FLOAT(192, 32000)"];
  O_81 [label="output_0\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
  MatMul_80 -> O_81;
  O_82 [label="present_key_values_key_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
  Concat_61 -> O_82;
  O_83 [label="present_key_values_value_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
  Concat_62 -> O_83;
}

Or this one obtained with torch.onnx.export().

digraph {
  graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
  node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
  edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
  I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
  I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
  I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
  I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
  I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
  i_5 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
  i_6 [label="val_344\nFLOAT(1, 48)", fillcolor="#cccc00"];
  i_7 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
  i_8 [label="val_120\nFLOAT(192, 192)", fillcolor="#cccc00"];
  i_9 [label="val_127\nFLOAT(192, 96)", fillcolor="#cccc00"];
  i_10 [label="val_134\nFLOAT(192, 96)", fillcolor="#cccc00"];
  i_11 [label="val_288\nFLOAT(192, 192)", fillcolor="#cccc00"];
  i_12 [label="val_293\nFLOAT(192, 1024)", fillcolor="#cccc00"];
  i_13 [label="val_295\nFLOAT(192, 1024)", fillcolor="#cccc00"];
  i_14 [label="val_296\nFLOAT(1024, 192)", fillcolor="#cccc00"];
  i_15 [label="val_321\nFLOAT(192, 32000)", fillcolor="#cccc00"];
  Shape_16 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
  Squeeze_17 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_18 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
  Squeeze_19 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
  Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_22 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
  Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_24 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
  Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Add_26 [label="Add(., .)", fillcolor="#cccccc"];
  Max_27 [label="Max(., .)", fillcolor="#cccccc"];
  Add_28 [label="Add(., .)", fillcolor="#cccccc"];
  Add_29 [label="Add(., .)", fillcolor="#cccccc"];
  Gather_30 [label="Gather(., ., axis=0)", fillcolor="#cccccc"];
  Range_31 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Add_32 [label="Add(., .)", fillcolor="#cccccc"];
  Cast_33 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
  Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Range_35 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Unsqueeze_36 [label="Unsqueeze(., [1, 2])", fillcolor="#eeeeee"];
  Unsqueeze_37 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
  Unsqueeze_38 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
  Unsqueeze_39 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
  Unsqueeze_40 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
  Unsqueeze_41 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
  LessOrEqual_42 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
  And_43 [label="And(True, .)", fillcolor="#cccccc"];
  Max_44 [label="Max(., .)", fillcolor="#cccccc"];
  Shape_45 [label="Shape(., start=0)", fillcolor="#d2a81f"];
  Expand_46 [label="Expand(., .)", fillcolor="#cccccc"];
  Unsqueeze_47 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
  Expand_48 [label="Expand(., .)", fillcolor="#cccccc"];
  Unsqueeze_49 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
  Concat_50 [label="Concat(., ., axis=-1)", fillcolor="#cccccc"];
  GatherND_51 [label="GatherND(., .)", fillcolor="#cccccc"];
  And_52 [label="And(., .)", fillcolor="#cccccc"];
  Reshape_53 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
  Concat_54 [label="Concat(., [1], ., ., axis=0)", fillcolor="#cccccc"];
  Expand_55 [label="Expand(., .)", fillcolor="#cccccc"];
  SimplifiedLayerNormalization_56 [label="SimplifiedLayerNormalization\n(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
  MatMul_57 [label="MatMul(., .)", fillcolor="#ee9999"];
  Concat_58 [label="Concat(., ., [-1], [96], axis=0)", fillcolor="#cccccc"];
  Reshape_59 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Transpose_60 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  MatMul_61 [label="MatMul(., .)", fillcolor="#ee9999"];
  Reshape_62 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Transpose_63 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  MatMul_64 [label="MatMul(., .)", fillcolor="#ee9999"];
  Reshape_65 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Transpose_66 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  ReduceMax_67 [label="ReduceMax(.)", fillcolor="#cccccc"];
  Add_68 [label="Add(., 1)", fillcolor="#cccccc"];
  Range_69 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Reshape_70 [label="Reshape(., [-1, 1])", fillcolor="#eeeeee"];
  Cast_71 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
  MatMul_72 [label="MatMul(., .)", fillcolor="#ee9999"];
  Cos_73 [label="Cos(.)", fillcolor="#cccccc"];
  Sin_74 [label="Sin(.)", fillcolor="#cccccc"];
  RotaryEmbedding_75 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
  RotaryEmbedding_76 [label="com.microsoft.\nRotaryEmbedding\n(., ., ., .)", fillcolor="#cccccc"];
  Concat_77 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
  Concat_78 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
  Unsqueeze_79 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
  Reshape_80 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
  Concat_81 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
  Expand_82 [label="Expand(., .)", fillcolor="#cccccc"];
  Concat_83 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
  Reshape_84 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Unsqueeze_85 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
  Reshape_86 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
  Concat_87 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
  Expand_88 [label="Expand(., .)", fillcolor="#cccccc"];
  Concat_89 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
  Reshape_90 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Slice_91 [label="Slice(., [0], ., [3], [1])", fillcolor="#cccccc"];
  Shape_92 [label="Shape(., start=0)", fillcolor="#d2a81f"];
  Slice_93 [label="Slice(., [-1], [9223372036854775807])", fillcolor="#cccccc"];
  Slice_94 [label="Slice(., [-2], [-1])", fillcolor="#cccccc"];
  Slice_95 [label="Slice\n(., [-9223372036854775808], [-2])", fillcolor="#cccccc"];
  Concat_96 [label="Concat([-1], ., ., axis=0)", fillcolor="#cccccc"];
  Reshape_97 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Transpose_98 [label="Transpose(., perm=[0, 2, 1])", fillcolor="#ee99ee"];
  Concat_99 [label="Concat(., ., ., axis=0)", fillcolor="#cccccc"];
  Reshape_100 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Mul_101 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
  Mul_102 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
  Where_103 [label="Where(., 0.0, -inf)", fillcolor="#cccccc"];
  MatMul_104 [label="MatMul(., .)", fillcolor="#ee9999"];
  Add_105 [label="Add(., .)", fillcolor="#cccccc"];
  Softmax_106 [label="Softmax(., axis=-1)", fillcolor="#cccccc"];
  IsNaN_107 [label="IsNaN(.)", fillcolor="#cccccc"];
  Where_108 [label="Where(., 0.0, .)", fillcolor="#cccccc"];
  MatMul_109 [label="MatMul(., .)", fillcolor="#ee9999"];
  Transpose_110 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  Concat_111 [label="Concat(., ., [-1], axis=0)", fillcolor="#cccccc"];
  Reshape_112 [label="Reshape(., .)", fillcolor="#eeeeee"];
  MatMul_113 [label="MatMul(., .)", fillcolor="#ee9999"];
  SkipSimplifiedLayerNormalization_114 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
  MatMul_115 [label="MatMul(., .)", fillcolor="#ee9999"];
  Sigmoid_116 [label="Sigmoid(.)", fillcolor="#cccccc"];
  Mul_117 [label="Mul(., .)", fillcolor="#cccccc"];
  MatMul_118 [label="MatMul(., .)", fillcolor="#ee9999"];
  Mul_119 [label="Mul(., .)", fillcolor="#cccccc"];
  MatMul_120 [label="MatMul(., .)", fillcolor="#ee9999"];
  SkipSimplifiedLayerNormalization_121 [label="com.microsoft.\nSkipSimplifiedLayerNormalization\n(., ., .)", fillcolor="#cccccc"];
  MatMul_122 [label="MatMul(., .)", fillcolor="#ee9999"];
  I_0 -> Shape_16 [label="INT64(batch,seq_length)"];
  Shape_16 -> Squeeze_17 [label="INT64(1)"];
  I_2 -> Shape_18 [label="INT64(batch,seq_length)"];
  Shape_18 -> Squeeze_19 [label="INT64(1)"];
  I_2 -> Shape_20 [label="INT64(batch,seq_length)"];
  Shape_20 -> Squeeze_21 [label="INT64(1)"];
  I_3 -> Shape_22 [label="FLOAT(batch,1,cache_length,96)"];
  Shape_22 -> Squeeze_23 [label="INT64(1)"];
  I_4 -> Shape_24 [label="FLOAT(batch,1,cache_length,96)"];
  Shape_24 -> Squeeze_25 [label="INT64(1)"];
  Squeeze_23 -> Add_26 [label="INT64()"];
  Squeeze_17 -> Add_26 [label="INT64()"];
  Squeeze_17 -> Max_27 [label="INT64()"];
  Squeeze_21 -> Max_27 [label="INT64()"];
  Squeeze_23 -> Add_28 [label="INT64()"];
  Max_27 -> Add_28 [label="INT64()"];
  Squeeze_25 -> Add_29 [label="INT64()"];
  Squeeze_17 -> Add_29 [label="INT64()"];
  i_7 -> Gather_30 [label="FLOAT(32000, 192)"];
  I_0 -> Gather_30 [label="INT64(batch,seq_length)"];
  Squeeze_17 -> Range_31 [label="INT64()"];
  Range_31 -> Add_32 [label="INT64(seq_length)"];
  Squeeze_23 -> Add_32 [label="INT64()"];
  I_1 -> Cast_33 [label="INT64(batch,cache+seq)"];
  Squeeze_19 -> Range_34 [label="INT64()"];
  Add_26 -> Range_35 [label="INT64()"];
  Range_34 -> Unsqueeze_36 [label="INT64(batch)"];
  Unsqueeze_36 -> Unsqueeze_37 [label="INT64(batch,1,1)"];
  Add_32 -> Unsqueeze_38 [label="INT64(seq_length)"];
  Unsqueeze_38 -> Unsqueeze_39 [label="INT64(1,1,seq_length)"];
  Range_35 -> Unsqueeze_40 [label="INT64(cache_length + seq_length)"];
  Unsqueeze_40 -> Unsqueeze_41 [label="INT64(1,1,cache_length + seq_length)"];
  Unsqueeze_41 -> LessOrEqual_42 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
  Unsqueeze_39 -> LessOrEqual_42 [label="INT64(1,1,seq_length,1)"];
  LessOrEqual_42 -> And_43 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
  Unsqueeze_37 -> Max_44 [label="INT64(batch,1,1,1)"];
  Unsqueeze_41 -> Max_44 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
  Max_44 -> Shape_45 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
  Unsqueeze_37 -> Expand_46 [label="INT64(batch,1,1,1)"];
  Shape_45 -> Expand_46 [label="INT64(4)"];
  Expand_46 -> Unsqueeze_47 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
  Unsqueeze_41 -> Expand_48 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
  Shape_45 -> Expand_48 [label="INT64(4)"];
  Expand_48 -> Unsqueeze_49 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
  Unsqueeze_47 -> Concat_50 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
  Unsqueeze_49 -> Concat_50 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
  Cast_33 -> GatherND_51 [label="BOOL(batch,cache+seq)"];
  Concat_50 -> GatherND_51 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n2)"];
  And_43 -> And_52 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
  GatherND_51 -> And_52 [label="BOOL(batch,\n1,\n1,\ncache_length + seq_length)"];
  Add_26 -> Reshape_53 [label="INT64()"];
  Shape_18 -> Concat_54 [label="INT64(1)"];
  Shape_16 -> Concat_54 [label="INT64(1)"];
  Reshape_53 -> Concat_54 [label="INT64(1)"];
  And_52 -> Expand_55 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
  Concat_54 -> Expand_55 [label="INT64(4)"];
  Gather_30 -> SimplifiedLayerNormalization_56 [label="FLOAT(batch,seq_length,192)"];
  i_5 -> SimplifiedLayerNormalization_56 [label="FLOAT(192)"];
  SimplifiedLayerNormalization_56 -> MatMul_57 [label="FLOAT(batch,seq_length,192)"];
  i_8 -> MatMul_57 [label="FLOAT(192, 192)"];
  Shape_18 -> Concat_58 [label="INT64(1)"];
  Shape_16 -> Concat_58 [label="INT64(1)"];
  MatMul_57 -> Reshape_59 [label="FLOAT(batch,seq_length,192)"];
  Concat_58 -> Reshape_59 [label="INT64(4)"];
  Reshape_59 -> Transpose_60 [label="FLOAT(batch,seq_length,2,96)"];
  SimplifiedLayerNormalization_56 -> MatMul_61 [label="FLOAT(batch,seq_length,192)"];
  i_9 -> MatMul_61 [label="FLOAT(192, 96)"];
  MatMul_61 -> Reshape_62 [label="FLOAT(batch,seq_length,96)"];
  Concat_58 -> Reshape_62 [label="INT64(4)"];
  Reshape_62 -> Transpose_63 [label="FLOAT(batch,seq_length,1,96)"];
  SimplifiedLayerNormalization_56 -> MatMul_64 [label="FLOAT(batch,seq_length,192)"];
  i_10 -> MatMul_64 [label="FLOAT(192, 96)"];
  MatMul_64 -> Reshape_65 [label="FLOAT(batch,seq_length,96)"];
  Concat_58 -> Reshape_65 [label="INT64(4)"];
  Reshape_65 -> Transpose_66 [label="FLOAT(batch,seq_length,1,96)"];
  I_2 -> ReduceMax_67 [label="INT64(batch,seq_length)"];
  ReduceMax_67 -> Add_68 [label="INT64()"];
  Add_68 -> Range_69 [label="INT64()"];
  Range_69 -> Reshape_70 [label="INT64(?)"];
  Reshape_70 -> Cast_71 [label="INT64(?,1)"];
  Cast_71 -> MatMul_72 [label="FLOAT(?,1)"];
  i_6 -> MatMul_72 [label="FLOAT(1, 48)"];
  MatMul_72 -> Cos_73 [label="FLOAT(?,48)"];
  MatMul_72 -> Sin_74 [label="FLOAT(?,48)"];
  Transpose_60 -> RotaryEmbedding_75 [label="FLOAT(batch,2,seq_length,96)"];
  I_2 -> RotaryEmbedding_75 [label="INT64(batch,seq_length)"];
  Cos_73 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
  Sin_74 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
  Transpose_63 -> RotaryEmbedding_76 [label="FLOAT(batch,1,seq_length,96)"];
  I_2 -> RotaryEmbedding_76 [label="INT64(batch,seq_length)"];
  Cos_73 -> RotaryEmbedding_76 [label="FLOAT(?,48)"];
  Sin_74 -> RotaryEmbedding_76 [label="FLOAT(?,48)"];
  I_3 -> Concat_77 [label="FLOAT(batch,1,cache_length,96)"];
  RotaryEmbedding_76 -> Concat_77 [label="FLOAT(batch,\n1,\nMax(seq_length, seq_length),\n96)"];
  I_4 -> Concat_78 [label="FLOAT(batch,1,cache_length,96)"];
  Transpose_66 -> Concat_78 [label="FLOAT(batch,1,seq_length,96)"];
  Concat_77 -> Unsqueeze_79;
  Add_28 -> Reshape_80 [label="INT64()"];
  Shape_18 -> Concat_81 [label="INT64(1)"];
  Reshape_80 -> Concat_81 [label="INT64(1)"];
  Unsqueeze_79 -> Expand_82 [label="FLOAT(batch,\n1,\n1,\ncache_length + Max(seq_length, seq_length),\n96)"];
  Concat_81 -> Expand_82 [label="INT64(5)"];
  Shape_18 -> Concat_83 [label="INT64(1)"];
  Reshape_80 -> Concat_83 [label="INT64(1)"];
  Expand_82 -> Reshape_84 [label="FLOAT(batch,\n1,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
  Concat_83 -> Reshape_84 [label="INT64(4)"];
  Concat_78 -> Unsqueeze_85;
  Add_29 -> Reshape_86 [label="INT64()"];
  Shape_18 -> Concat_87 [label="INT64(1)"];
  Reshape_86 -> Concat_87 [label="INT64(1)"];
  Unsqueeze_85 -> Expand_88 [label="FLOAT(batch,\n1,\n1,\ncache_length + seq_length,\n96)"];
  Concat_87 -> Expand_88 [label="INT64(5)"];
  Shape_18 -> Concat_89 [label="INT64(1)"];
  Reshape_86 -> Concat_89 [label="INT64(1)"];
  Expand_88 -> Reshape_90 [label="FLOAT(batch,\n1,\n2,\ncache_length + seq_length,\n96)"];
  Concat_89 -> Reshape_90 [label="INT64(4)"];
  Expand_55 -> Slice_91 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
  Reshape_80 -> Slice_91 [label="INT64(1)"];
  Reshape_84 -> Shape_92 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
  Shape_92 -> Slice_93 [label="INT64(4)"];
  Shape_92 -> Slice_94 [label="INT64(4)"];
  Shape_92 -> Slice_95 [label="INT64(4)"];
  Slice_94 -> Concat_96 [label="INT64(1)"];
  Slice_93 -> Concat_96 [label="INT64(1)"];
  Reshape_84 -> Reshape_97 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
  Concat_96 -> Reshape_97 [label="INT64(3)"];
  Reshape_97 -> Transpose_98 [label="FLOAT(?,\ncache_length + Max(seq_length, seq_length),\n96)"];
  Slice_95 -> Concat_99 [label="INT64(2)"];
  Slice_93 -> Concat_99 [label="INT64(1)"];
  Slice_94 -> Concat_99 [label="INT64(1)"];
  Transpose_98 -> Reshape_100 [label="FLOAT(?,\n96,\ncache_length + Max(seq_length, seq_length))"];
  Concat_99 -> Reshape_100 [label="INT64(4)"];
  RotaryEmbedding_75 -> Mul_101 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
  Reshape_100 -> Mul_102 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
  Slice_91 -> Where_103 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
  Mul_101 -> MatMul_104 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
  Mul_102 -> MatMul_104 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
  MatMul_104 -> Add_105 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\ncache_length + Max(seq_length, seq_length))"];
  Where_103 -> Add_105 [label="FLOAT(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
  Add_105 -> Softmax_106 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
  Softmax_106 -> IsNaN_107 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
  IsNaN_107 -> Where_108 [label="BOOL(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
  Softmax_106 -> Where_108 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
  Where_108 -> MatMul_109 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
  Reshape_90 -> MatMul_109 [label="FLOAT(batch,\n2,\ncache_length + seq_length,\n96)"];
  MatMul_109 -> Transpose_110 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
  Shape_18 -> Concat_111 [label="INT64(1)"];
  Shape_16 -> Concat_111 [label="INT64(1)"];
  Transpose_110 -> Reshape_112 [label="FLOAT(batch,\nMax(seq_length, seq_length),\n2,\n96)"];
  Concat_111 -> Reshape_112 [label="INT64(3)"];
  Reshape_112 -> MatMul_113 [label="FLOAT(batch,\nseq_length,\n((192*Max(seq_length, seq_length))//seq_length))"];
  i_11 -> MatMul_113 [label="FLOAT(192, 192)"];
  MatMul_113 -> SkipSimplifiedLayerNormalization_114 [label="FLOAT(batch,seq_length,192)"];
  Gather_30 -> SkipSimplifiedLayerNormalization_114 [label="FLOAT(batch,seq_length,192)"];
  i_5 -> SkipSimplifiedLayerNormalization_114 [label="FLOAT(192)"];
  SkipSimplifiedLayerNormalization_114 -> MatMul_115 [label="FLOAT(batch,seq_length,192)"];
  i_12 -> MatMul_115 [label="FLOAT(192, 1024)"];
  MatMul_115 -> Sigmoid_116 [label="FLOAT(batch,seq_length,1024)"];
  MatMul_115 -> Mul_117 [label="FLOAT(batch,seq_length,1024)"];
  Sigmoid_116 -> Mul_117 [label="FLOAT(batch,seq_length,1024)"];
  SkipSimplifiedLayerNormalization_114 -> MatMul_118 [label="FLOAT(batch,seq_length,192)"];
  i_13 -> MatMul_118 [label="FLOAT(192, 1024)"];
  Mul_117 -> Mul_119 [label="FLOAT(batch,seq_length,1024)"];
  MatMul_118 -> Mul_119 [label="FLOAT(batch,seq_length,1024)"];
  Mul_119 -> MatMul_120 [label="FLOAT(batch,seq_length,1024)"];
  i_14 -> MatMul_120 [label="FLOAT(1024, 192)"];
  MatMul_120 -> SkipSimplifiedLayerNormalization_121 [label="FLOAT(batch,seq_length,192)"];
  SkipSimplifiedLayerNormalization_114 -> SkipSimplifiedLayerNormalization_121 [label="FLOAT(batch,seq_length,192)"];
  i_5 -> SkipSimplifiedLayerNormalization_121 [label="FLOAT(192)"];
  SkipSimplifiedLayerNormalization_121 -> MatMul_122 [label="FLOAT(batch,seq_length,192)"];
  i_15 -> MatMul_122 [label="FLOAT(192, 32000)"];
  O_123 [label="linear_7\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
  MatMul_122 -> O_123;
  O_124 [label="cat_7\nFLOAT(batch,1,cache_length + Max(seq_length, seq_length),96)", fillcolor="#aaaaee"];
  Concat_77 -> O_124;
  O_125 [label="cat_8\nFLOAT(batch,1,cache_length + seq_length,96)", fillcolor="#aaaaee"];
  Concat_78 -> O_125;
}
Next
onnx_diagnostic.helpers.fake_tensor_helper
Previous
onnx_diagnostic.helpers.doc_helper
Copyright © 2025
Made with Sphinx and @pradyunsg's Furo
On this page
  • onnx_diagnostic.helpers.dot_helper
    • to_dot()