Contents Menu Expand Light mode Dark mode Auto light/dark, in light mode Auto light/dark, in dark mode Skip to content
onnx-diagnostic 0.8.3 documentation
Logo
onnx-diagnostic 0.8.3 documentation

Contents

  • Patches Explained
    • Exporter Status
      • Exported Programs with Dynamic Shapes
      • Exported ONNX with Dynamic Shapes
      • Coverage of the Patches
      • Patches Diff
  • API of onnx_diagnostic
    • onnx_diagnostic.export
      • onnx_diagnostic.export.api
      • onnx_diagnostic.export.control_flow_onnx
      • onnx_diagnostic.export.dynamic_shapes
      • onnx_diagnostic.export.onnx_plug
      • onnx_diagnostic.export.shape_helper
      • onnx_diagnostic.export.validate
    • onnx_diagnostic.helpers
      • onnx_diagnostic.helpers.args_helper
      • onnx_diagnostic.helpers.bench_run
      • onnx_diagnostic.helpers.cache_helper
      • onnx_diagnostic.helpers.config_helper
      • onnx_diagnostic.helpers.doc_helper
      • onnx_diagnostic.helpers.dot_helper
      • onnx_diagnostic.helpers.fake_tensor_helper
      • onnx_diagnostic.helpers.graph_helper
      • onnx_diagnostic.helpers.helper
      • onnx_diagnostic.helpers._log_helper
      • onnx_diagnostic.helpers.log_helper
      • onnx_diagnostic.helpers.memory_peak
      • onnx_diagnostic.helpers.mini_onnx_builder
      • onnx_diagnostic.helpers.model_builder_helper
      • onnx_diagnostic.helpers.onnx_helper
      • onnx_diagnostic.helpers.ort_session
      • onnx_diagnostic.helpers.rt_helper
      • onnx_diagnostic.helpers.torch_fx_graph_helper
      • onnx_diagnostic.helpers.torch_helper
    • onnx_diagnostic.reference
      • onnx_diagnostic.reference.ops
        • onnx_diagnostic.reference.ops.op_add_add_mul_mul
        • onnx_diagnostic.reference.ops.op_average_pool_grad
        • onnx_diagnostic.reference.ops.op_cast_like
        • onnx_diagnostic.reference.ops.op_complex
        • onnx_diagnostic.reference.ops.op_concat
        • onnx_diagnostic.reference.ops.op_constant_of_shape
        • onnx_diagnostic.reference.ops.op_fused_matmul
        • onnx_diagnostic.reference.ops.op_gather_grad
        • onnx_diagnostic.reference.ops.op_memcpy_host
        • onnx_diagnostic.reference.ops.op_mul_sigmoid
        • onnx_diagnostic.reference.ops.op_negxplus1
        • onnx_diagnostic.reference.ops.op_quick_gelu
        • onnx_diagnostic.reference.ops.op_replace_zero
        • onnx_diagnostic.reference.ops.op_rotary
        • onnx_diagnostic.reference.ops.op_qlinear_average_pool
        • onnx_diagnostic.reference.ops.op_qlinear_conv
        • onnx_diagnostic.reference.ops.op_scatter_elements
        • onnx_diagnostic.reference.ops.op_scatternd_of_shape
        • onnx_diagnostic.reference.ops.op_simplified_layer_normalization
        • onnx_diagnostic.reference.ops.op_skip_layer_normalization
        • onnx_diagnostic.reference.ops.op_slice
        • onnx_diagnostic.reference.ops.op_transpose_cast
        • onnx_diagnostic.reference.ops.op_tri_matrix
      • onnx_diagnostic.reference.torch_ops
        • onnx_diagnostic.reference.torch_ops.access_ops
        • onnx_diagnostic.reference.torch_ops.binary_ops
        • onnx_diagnostic.reference.torch_ops.controlflow_ops
        • onnx_diagnostic.reference.torch_ops.generator_ops
        • onnx_diagnostic.reference.torch_ops.nn_ops
        • onnx_diagnostic.reference.torch_ops.other_ops
        • onnx_diagnostic.reference.torch_ops.reduce_ops
        • onnx_diagnostic.reference.torch_ops.sequence_ops
        • onnx_diagnostic.reference.torch_ops.shape_ops
        • onnx_diagnostic.reference.torch_ops.unary_ops
      • onnx_diagnostic.reference.evaluator
      • onnx_diagnostic.reference.quantized_tensor
      • onnx_diagnostic.reference.ort_evaluator
      • onnx_diagnostic.reference.report_results_comparison
      • onnx_diagnostic.reference.torch_evaluator
    • onnx_diagnostic.tasks
      • onnx_diagnostic.tasks.automatic_speech_recognition
      • onnx_diagnostic.tasks.fill_mask
      • onnx_diagnostic.tasks.feature_extraction
      • onnx_diagnostic.tasks.image_classification
      • onnx_diagnostic.export.image_text_to_text
      • onnx_diagnostic.tasks.mixture_of_expert
      • onnx_diagnostic.tasks.object_detection
      • onnx_diagnostic.tasks.sentence_similarity
      • onnx_diagnostic.tasks.summarization
      • onnx_diagnostic.tasks.text_classification
      • onnx_diagnostic.tasks.text_generation
      • onnx_diagnostic.tasks.text_to_image
      • onnx_diagnostic.tasks.text2text_generation
      • onnx_diagnostic.tasks.zero_shot_image_classification
    • onnx_diagnostic.torch_export_patches
      • onnx_diagnostic.torch_export_patches.eval
        • onnx_diagnostic.torch_export_patches.eval.model_cases
      • onnx_diagnostic.torch_export_patches.onnx_export_errors
      • onnx_diagnostic.torch_export_patches.onnx_export_serialization
      • onnx_diagnostic.torch_export_patches.patches
        • onnx_diagnostic.torch_export_patches.patches.patch_torch
        • onnx_diagnostic.torch_export_patches.patches.patch_transformers
      • onnx_diagnostic.torch_export_patches.patch_details
      • onnx_diagnostic.torch_export_patches.patch_expressions
      • onnx_diagnostic.torch_export_patches.patch_inputs
      • onnx_diagnostic.torch_export_patches.patch_module
      • onnx_diagnostic.torch_export_patches.patch_module_helper
      • onnx_diagnostic.torch_export_patches.serialization
        • onnx_diagnostic.torch_export_patches.serialization.diffusers_impl
        • onnx_diagnostic.torch_export_patches.serialization.transformers_impl
    • onnx_diagnostic.torch_models
      • onnx_diagnostic.torch_models.code_sample
      • onnx_diagnostic.torch_models.hghub
        • onnx_diagnostic.torch_models.hghub.hub_api
        • onnx_diagnostic.torch_models.hghub.hub_data
        • onnx_diagnostic.torch_models.hghub.model_inputs
      • onnx_diagnostic.torch_models.llms
      • onnx_diagnostic.torch_models.validate
    • onnx_diagnostic.torch_onnx
      • onnx_diagnostic.torch_onnx.runtime_info
      • onnx_diagnostic.torch_onnx.sbs
      • onnx_diagnostic.torch_onnx.sbs_dataclasses
    • onnx_diagnostic.api
    • onnx_diagnostic.ext_test_case
  • Command Lines
    • -m onnx_diagnostic config … prints the config for a model id
    • -m onnx_diagnostic sbs … runs a side-by-side torch/onnx
    • -m onnx_diagnostic validate … validate a model id
  • Examples Gallery
    • Dumps intermediate results of a torch model
    • Dynamic Shapes for *args, **kwargs
    • Export Tiny-LLM with patches
    • Export microsoft/phi-2
    • Export with DynamicCache and guessed dynamic shapes
    • Export with dynamic dimensions in {0,1}
    • Export with dynamic dimensions in {0,1} into ONNX
    • Export with dynamic dimensions in {0,1} into ONNX (custom)
    • Find and fix an export issue due to dynamic shapes
    • Find where a model is failing by running submodels
    • Intermediate results with (ONNX) ReferenceEvaluator
    • Intermediate results with onnxruntime
    • Steel method forward to guess inputs and dynamic shapes (with Tiny-LLM)
    • Test the export on untrained models
  • Common Export Issues
    • 0, 1, 2 for a Dynamic Dimension in the dummy example to export a model
    • Builds dynamic shapes from any input
    • Cannot export torch.sym_max(x.shape[0], y.shape[0])
    • Do not use python int with dynamic shapes
    • Export a model with a control flow (If)
    • Half certain nonzero
    • JSON returns list when the original dynamic shapes are list or tuple
    • Use DYNAMIC or AUTO when exporting if dynamic shapes has constraints
  • Technical Details
    • Dynamic Shapes and Broadcasting
    • From a LLM to processing a prompt
    • Gemm or Matmul + Add
    • LayerNormalization implementation cannot be exchanged
    • Reproducible Parallelized Reduction is difficult

More

  • Change Logs
  • License
Back to top
View this page

onnx_diagnostic.helpers.dot_helper¶

onnx_diagnostic.helpers.dot_helper.to_dot(model: ModelProto) → str[source][source]¶

Converts a model into a dot graph. Here is an example:

digraph {
  graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
  node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
  edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
  I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
  I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
  I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
  I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
  I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
  i_5 [label="p_model_layers_0_self_attn_q_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
  i_6 [label="p_model_layers_0_self_attn_k_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
  i_7 [label="p_model_layers_0_self_attn_v_proj_weight::T10\nFLOAT(192, 96)", fillcolor="#cccc00"];
  i_8 [label="p_model_layers_0_self_attn_o_proj_weight::T10\nFLOAT(192, 192)", fillcolor="#cccc00"];
  i_9 [label="p_model_layers_0_mlp_gate_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
  i_10 [label="p_model_layers_0_mlp_up_proj_weight::T10\nFLOAT(192, 1024)", fillcolor="#cccc00"];
  i_11 [label="p_model_layers_0_mlp_down_proj_weight::T10\nFLOAT(1024, 192)", fillcolor="#cccc00"];
  i_12 [label="p_lm_head_weight::T10\nFLOAT(192, 32000)", fillcolor="#cccc00"];
  i_13 [label="to_422\nFLOAT(1, 1, 48)", fillcolor="#cccc00"];
  i_14 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
  i_15 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
  i_16 [label="model.layers.0.post_attention_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
  i_17 [label="model.norm.weight\nFLOAT(192)", fillcolor="#cccc00"];
  Cast_18 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
  Unsqueeze_19 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
  Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
  Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Range_22 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Shape_23 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
  Concat_24 [label="Concat(., [1], axis=0)", fillcolor="#cccccc"];
  Expand_25 [label="Expand(., .)", fillcolor="#cccccc"];
  Squeeze_26 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_27 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
  Add_28 [label="Add(., .)", fillcolor="#cccccc"];
  Squeeze_29 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Range_30 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Unsqueeze_31 [label="Unsqueeze(., [0, 1, 2])", fillcolor="#eeeeee"];
  CausalMask_32 [label="intermediate.\nCausalMask(., .)", fillcolor="#cccccc"];
  Gather_33 [label="Gather(., .)", fillcolor="#cccccc"];
  SimplifiedLayerNormalization_34 [label="SimplifiedLayerNormalization(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
  Cast_35 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
  Range_36 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Shape_37 [label="Shape(., start=-1)", fillcolor="#d2a81f"];
  Mul_38 [label="Mul(., .)", fillcolor="#cccccc"];
  Unsqueeze_39 [label="Unsqueeze(., [1, 2, 3])", fillcolor="#eeeeee"];
  Add_40 [label="Add(., .)", fillcolor="#cccccc"];
  Reshape_41 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
  Reshape_42 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
  Gather_43 [label="Gather(., .)", fillcolor="#cccccc"];
  Shape_44 [label="Shape(.)", fillcolor="#d2a81f"];
  Reshape_45 [label="Reshape(., .)", fillcolor="#eeeeee"];
  And_46 [label="And(., .)", fillcolor="#cccccc"];
  Constant_47 [label="Constant() -> to_5-ZEROS2", fillcolor="#cccccc"];
  Reshape_48 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Mul_49 [label="Mul(., .)", fillcolor="#cccccc"];
  Sin_50 [label="Sin(.)", fillcolor="#cccccc"];
  Squeeze_51 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
  Cos_52 [label="Cos(.)", fillcolor="#cccccc"];
  Squeeze_53 [label="Squeeze(., [0])", fillcolor="#eeeeee"];
  MatMul_54 [label="MatMul(., .)", fillcolor="#ee9999"];
  RotaryEmbedding_55 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
  Reshape_56 [label="Reshape(., [0, 0, 2, 96])", fillcolor="#eeeeee"];
  Transpose_57 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  MatMul_58 [label="MatMul(., .)", fillcolor="#ee9999"];
  Unsqueeze_59 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
  RotaryEmbedding_60 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
  MatMul_61 [label="MatMul(., .)", fillcolor="#ee9999"];
  Unsqueeze_62 [label="Unsqueeze(., [1])", fillcolor="#eeeeee"];
  Concat_63 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
  Concat_64 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
  Unsqueeze_65 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
  Expand_66 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
  Squeeze_67 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
  Unsqueeze_68 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
  Expand_69 [label="Expand(., [1, 1, 2, 1, 1])", fillcolor="#cccccc"];
  Squeeze_70 [label="Squeeze(., [1])", fillcolor="#eeeeee"];
  LocalAttention_to1_71 [label="intermediate.\nLocalAttention_to1(., ., ., ., [0.31947157])", fillcolor="#cccccc"];
  Transpose_72 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  Reshape_73 [label="Reshape(., [0, 0, 192])", fillcolor="#eeeeee"];
  MatMul_74 [label="MatMul(., .)", fillcolor="#ee9999"];
  SkipSimplifiedLayerNormalization_75 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
  MatMul_76 [label="MatMul(., .)", fillcolor="#ee9999"];
  QuickGelu_77 [label="com.microsoft.\nQuickGelu(.)", fillcolor="#cccccc"];
  MatMul_78 [label="MatMul(., .)", fillcolor="#ee9999"];
  Mul_79 [label="Mul(., .)", fillcolor="#cccccc"];
  MatMul_80 [label="MatMul(., .)", fillcolor="#ee9999"];
  SkipSimplifiedLayerNormalization_81 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
  MatMul_82 [label="MatMul(., .)", fillcolor="#ee9999"];
  I_2 -> Cast_18 [label="INT64(batch,seq_length)"];
  Cast_18 -> Unsqueeze_19 [label="FLOAT(batch,seq_length)"];
  I_0 -> Shape_20 [label="INT64(batch,seq_length)"];
  Shape_20 -> Squeeze_21 [label="INT64(1)"];
  Squeeze_21 -> Range_22 [label="INT64()"];
  I_2 -> Shape_23 [label="INT64(batch,seq_length)"];
  Shape_23 -> Concat_24 [label="INT64(1)"];
  Range_22 -> Expand_25 [label="INT64(seq_length)"];
  Concat_24 -> Expand_25 [label="INT64(2)"];
  Shape_23 -> Squeeze_26 [label="INT64(1)"];
  I_3 -> Shape_27 [label="FLOAT(batch,1,cache_length,96)"];
  Shape_27 -> Add_28 [label="INT64(1)"];
  Shape_20 -> Add_28 [label="INT64(1)"];
  Add_28 -> Squeeze_29 [label="INT64(1)"];
  Squeeze_29 -> Range_30 [label="INT64()"];
  Range_30 -> Unsqueeze_31 [label="INT64(cache_length+seq_length)"];
  Shape_27 -> CausalMask_32 [label="INT64(1)"];
  Add_28 -> CausalMask_32 [label="INT64(1)"];
  i_14 -> Gather_33 [label="FLOAT(32000, 192)"];
  I_0 -> Gather_33 [label="INT64(batch,seq_length)"];
  Gather_33 -> SimplifiedLayerNormalization_34 [label="FLOAT(batch,seq_length,192)"];
  i_15 -> SimplifiedLayerNormalization_34 [label="FLOAT(192)"];
  I_1 -> Cast_35 [label="INT64(batch,cache+seq)"];
  Squeeze_26 -> Range_36 [label="INT64()"];
  Cast_35 -> Shape_37 [label="BOOL(batch,cache+seq)"];
  Range_36 -> Mul_38 [label="INT64(batch)"];
  Shape_37 -> Mul_38 [label="INT64(1)"];
  Mul_38 -> Unsqueeze_39 [label="INT64(batch)"];
  Unsqueeze_31 -> Add_40 [label="INT64(1,1,1,cache_length+seq_length)"];
  Unsqueeze_39 -> Add_40 [label="INT64(batch,1,1,1)"];
  Cast_35 -> Reshape_41 [label="BOOL(batch,cache+seq)"];
  Add_40 -> Reshape_42 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
  Reshape_41 -> Gather_43 [label="BOOL(batch*(cache+seq))"];
  Reshape_42 -> Gather_43 [label="INT64(batch*(cache_length+seq_length))"];
  Add_40 -> Shape_44 [label="INT64(batch,\n1,\n1,\ncache_length+seq_length)"];
  Gather_43 -> Reshape_45 [label="BOOL(batch*(cache_length+seq_length))"];
  Shape_44 -> Reshape_45 [label="INT64(4)"];
  CausalMask_32 -> And_46 [label="BOOL(1,\n1,\nseq_length,\ncache_length+seq_length)"];
  Reshape_45 -> And_46 [label="BOOL(batch,\n1,\n1,\ncache_length+seq_length)"];
  Unsqueeze_19 -> Reshape_48 [label="FLOAT(batch,1,seq_length)"];
  Constant_47 -> Reshape_48 [label="INT64(3)"];
  i_13 -> Mul_49 [label="FLOAT(1, 1, 48)"];
  Reshape_48 -> Mul_49 [label="FLOAT(batch,seq_length,1)"];
  Mul_49 -> Sin_50 [label="FLOAT(batch,seq_length,48)"];
  Sin_50 -> Squeeze_51 [label="FLOAT(batch,seq_length,48)"];
  Mul_49 -> Cos_52 [label="FLOAT(batch,seq_length,48)"];
  Cos_52 -> Squeeze_53 [label="FLOAT(batch,seq_length,48)"];
  SimplifiedLayerNormalization_34 -> MatMul_54 [label="FLOAT(batch,seq_length,192)"];
  i_5 -> MatMul_54 [label="FLOAT(192, 192)"];
  MatMul_54 -> RotaryEmbedding_55 [label="FLOAT(batch,seq_length,192)"];
  Expand_25 -> RotaryEmbedding_55 [label="INT64(batch,seq_length)"];
  Squeeze_53 -> RotaryEmbedding_55 [label="FLOAT(seq_length,48)"];
  Squeeze_51 -> RotaryEmbedding_55 [label="FLOAT(seq_length,48)"];
  RotaryEmbedding_55 -> Reshape_56 [label="FLOAT(batch,seq_length,192)"];
  Reshape_56 -> Transpose_57 [label="FLOAT(batch,seq_length,2,96)"];
  SimplifiedLayerNormalization_34 -> MatMul_58 [label="FLOAT(batch,seq_length,192)"];
  i_6 -> MatMul_58 [label="FLOAT(192, 96)"];
  MatMul_58 -> Unsqueeze_59 [label="FLOAT(batch,seq_length,96)"];
  Unsqueeze_59 -> RotaryEmbedding_60 [label="FLOAT(batch,1,seq_length,96)"];
  Expand_25 -> RotaryEmbedding_60 [label="INT64(batch,seq_length)"];
  Squeeze_53 -> RotaryEmbedding_60 [label="FLOAT(seq_length,48)"];
  Squeeze_51 -> RotaryEmbedding_60 [label="FLOAT(seq_length,48)"];
  SimplifiedLayerNormalization_34 -> MatMul_61 [label="FLOAT(batch,seq_length,192)"];
  i_7 -> MatMul_61 [label="FLOAT(192, 96)"];
  MatMul_61 -> Unsqueeze_62 [label="FLOAT(batch,seq_length,96)"];
  I_3 -> Concat_63 [label="FLOAT(batch,1,cache_length,96)"];
  RotaryEmbedding_60 -> Concat_63 [label="FLOAT(batch,1,seq_length,96)"];
  I_4 -> Concat_64 [label="FLOAT(batch,1,cache_length,96)"];
  Unsqueeze_62 -> Concat_64 [label="FLOAT(batch,1,seq_length,96)"];
  Concat_63 -> Unsqueeze_65;
  Unsqueeze_65 -> Expand_66 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
  Expand_66 -> Squeeze_67 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
  Concat_64 -> Unsqueeze_68;
  Unsqueeze_68 -> Expand_69 [label="FLOAT(batch,\n1,\n1,\ncache_length+seq_length,\n96)"];
  Expand_69 -> Squeeze_70 [label="FLOAT(batch,\n1,\n2,\ncache_length+seq_length,\n96)"];
  Transpose_57 -> LocalAttention_to1_71 [label="FLOAT(batch,2,seq_length,96)"];
  Squeeze_67 -> LocalAttention_to1_71 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
  Squeeze_70 -> LocalAttention_to1_71 [label="FLOAT(batch,\n2,\ncache_length+seq_length,\n96)"];
  And_46 -> LocalAttention_to1_71 [label="BOOL(batch,\n1,\nseq_length,\ncache_length+seq_length)"];
  LocalAttention_to1_71 -> Transpose_72 [label="FLOAT(batch,2,seq_length,96)"];
  Transpose_72 -> Reshape_73 [label="FLOAT(batch,seq_length,2,96)"];
  Reshape_73 -> MatMul_74 [label="FLOAT(batch,seq_length,192)"];
  i_8 -> MatMul_74 [label="FLOAT(192, 192)"];
  Gather_33 -> SkipSimplifiedLayerNormalization_75 [label="FLOAT(batch,seq_length,192)"];
  MatMul_74 -> SkipSimplifiedLayerNormalization_75 [label="FLOAT(batch,seq_length,192)"];
  i_16 -> SkipSimplifiedLayerNormalization_75 [label="FLOAT(192)"];
  SkipSimplifiedLayerNormalization_75 -> MatMul_76 [label="FLOAT(batch,seq_length,192)"];
  i_9 -> MatMul_76 [label="FLOAT(192, 1024)"];
  MatMul_76 -> QuickGelu_77 [label="FLOAT(batch,seq_length,1024)"];
  SkipSimplifiedLayerNormalization_75 -> MatMul_78 [label="FLOAT(batch,seq_length,192)"];
  i_10 -> MatMul_78 [label="FLOAT(192, 1024)"];
  QuickGelu_77 -> Mul_79 [label="FLOAT(batch,seq_length,1024)"];
  MatMul_78 -> Mul_79 [label="FLOAT(batch,seq_length,1024)"];
  Mul_79 -> MatMul_80 [label="FLOAT(batch,seq_length,1024)"];
  i_11 -> MatMul_80 [label="FLOAT(1024, 192)"];
  SkipSimplifiedLayerNormalization_75 -> SkipSimplifiedLayerNormalization_81 [label="FLOAT(batch,seq_length,192)"];
  MatMul_80 -> SkipSimplifiedLayerNormalization_81 [label="FLOAT(batch,seq_length,192)"];
  i_17 -> SkipSimplifiedLayerNormalization_81 [label="FLOAT(192)"];
  SkipSimplifiedLayerNormalization_81 -> MatMul_82 [label="FLOAT(batch,seq_length,192)"];
  i_12 -> MatMul_82 [label="FLOAT(192, 32000)"];
  O_83 [label="output_0\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
  MatMul_82 -> O_83;
  O_84 [label="present_key_values_key_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
  Concat_63 -> O_84;
  O_85 [label="present_key_values_value_0\nFLOAT(batch,1,cache_length+seq_length,96)", fillcolor="#aaaaee"];
  Concat_64 -> O_85;
}

Or this one obtained with torch.onnx.export().

digraph {
  graph [rankdir=TB, splines=true, overlap=false, nodesep=0.2, ranksep=0.2, fontsize=8];
  node [style="rounded,filled", color="#888888", fontcolor="#222222", shape=box];
  edge [arrowhead=vee, fontsize=7, labeldistance=-5, labelangle=0];
  I_0 [label="input_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
  I_1 [label="attention_mask\nINT64(batch,cache+seq)", fillcolor="#aaeeaa"];
  I_2 [label="position_ids\nINT64(batch,seq_length)", fillcolor="#aaeeaa"];
  I_3 [label="past_key_values_key_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
  I_4 [label="past_key_values_value_0\nFLOAT(batch,1,cache_length,96)", fillcolor="#aaeeaa"];
  i_5 [label="model.layers.0.input_layernorm.weight\nFLOAT(192)", fillcolor="#cccc00"];
  i_6 [label="val_343\nFLOAT(1, 48)", fillcolor="#cccc00"];
  i_7 [label="model.embed_tokens.weight\nFLOAT(32000, 192)", fillcolor="#cccc00"];
  i_8 [label="val_119\nFLOAT(192, 192)", fillcolor="#cccc00"];
  i_9 [label="val_126\nFLOAT(192, 96)", fillcolor="#cccc00"];
  i_10 [label="val_133\nFLOAT(192, 96)", fillcolor="#cccc00"];
  i_11 [label="val_287\nFLOAT(192, 192)", fillcolor="#cccc00"];
  i_12 [label="val_292\nFLOAT(192, 1024)", fillcolor="#cccc00"];
  i_13 [label="val_294\nFLOAT(192, 1024)", fillcolor="#cccc00"];
  i_14 [label="val_295\nFLOAT(1024, 192)", fillcolor="#cccc00"];
  i_15 [label="val_320\nFLOAT(192, 32000)", fillcolor="#cccc00"];
  Shape_16 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
  Squeeze_17 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_18 [label="Shape(., end=1, start=0)", fillcolor="#d2a81f"];
  Squeeze_19 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_20 [label="Shape(., end=2, start=1)", fillcolor="#d2a81f"];
  Squeeze_21 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_22 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
  Squeeze_23 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Shape_24 [label="Shape(., end=3, start=2)", fillcolor="#d2a81f"];
  Squeeze_25 [label="Squeeze(.)", fillcolor="#eeeeee"];
  Add_26 [label="Add(., .)", fillcolor="#cccccc"];
  Max_27 [label="Max(., .)", fillcolor="#cccccc"];
  Add_28 [label="Add(., .)", fillcolor="#cccccc"];
  Add_29 [label="Add(., .)", fillcolor="#cccccc"];
  Gather_30 [label="Gather(., ., axis=0)", fillcolor="#cccccc"];
  Range_31 [label="Range(., ., 1)", fillcolor="#cccccc"];
  Cast_32 [label="Cast(., to=BOOL)", fillcolor="#cccccc"];
  Range_33 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Range_34 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Unsqueeze_35 [label="Unsqueeze(., [1, 2])", fillcolor="#eeeeee"];
  Unsqueeze_36 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
  Unsqueeze_37 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
  Unsqueeze_38 [label="Unsqueeze(., [3])", fillcolor="#eeeeee"];
  Unsqueeze_39 [label="Unsqueeze(., [0, 1])", fillcolor="#eeeeee"];
  Unsqueeze_40 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
  LessOrEqual_41 [label="LessOrEqual(., .)", fillcolor="#cccccc"];
  And_42 [label="And(True, .)", fillcolor="#cccccc"];
  Max_43 [label="Max(., .)", fillcolor="#cccccc"];
  Shape_44 [label="Shape(., start=0)", fillcolor="#d2a81f"];
  Expand_45 [label="Expand(., .)", fillcolor="#cccccc"];
  Unsqueeze_46 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
  Expand_47 [label="Expand(., .)", fillcolor="#cccccc"];
  Unsqueeze_48 [label="Unsqueeze(., [-1])", fillcolor="#eeeeee"];
  Concat_49 [label="Concat(., ., axis=-1)", fillcolor="#cccccc"];
  GatherND_50 [label="GatherND(., .)", fillcolor="#cccccc"];
  And_51 [label="And(., .)", fillcolor="#cccccc"];
  Reshape_52 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
  Concat_53 [label="Concat(., [1], ., ., axis=0)", fillcolor="#cccccc"];
  Expand_54 [label="Expand(., .)", fillcolor="#cccccc"];
  SimplifiedLayerNormalization_55 [label="SimplifiedLayerNormalization(., ., axis=-1, stash_type=1)", fillcolor="#cccccc"];
  MatMul_56 [label="MatMul(., .)", fillcolor="#ee9999"];
  Concat_57 [label="Concat(., ., [-1], [96], axis=0)", fillcolor="#cccccc"];
  Reshape_58 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Transpose_59 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  MatMul_60 [label="MatMul(., .)", fillcolor="#ee9999"];
  Reshape_61 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Transpose_62 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  MatMul_63 [label="MatMul(., .)", fillcolor="#ee9999"];
  Reshape_64 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Transpose_65 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  ReduceMax_66 [label="ReduceMax(.)", fillcolor="#cccccc"];
  Add_67 [label="Add(., 1)", fillcolor="#cccccc"];
  Range_68 [label="Range(0, ., 1)", fillcolor="#cccccc"];
  Reshape_69 [label="Reshape(., [-1, 1])", fillcolor="#eeeeee"];
  Cast_70 [label="Cast(., to=FLOAT)", fillcolor="#cccccc"];
  MatMul_71 [label="MatMul(., .)", fillcolor="#ee9999"];
  Cos_72 [label="Cos(.)", fillcolor="#cccccc"];
  Sin_73 [label="Sin(.)", fillcolor="#cccccc"];
  RotaryEmbedding_74 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
  RotaryEmbedding_75 [label="com.microsoft.\nRotaryEmbedding(., ., ., .)", fillcolor="#cccccc"];
  Concat_76 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
  Concat_77 [label="Concat(., ., axis=-2)", fillcolor="#cccccc"];
  Unsqueeze_78 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
  Reshape_79 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
  Concat_80 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
  Expand_81 [label="Expand(., .)", fillcolor="#cccccc"];
  Concat_82 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
  Reshape_83 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Unsqueeze_84 [label="Unsqueeze(., [2])", fillcolor="#eeeeee"];
  Reshape_85 [label="Reshape(., [-1])", fillcolor="#eeeeee"];
  Concat_86 [label="Concat(., [1], [2], ., [96], axis=0)", fillcolor="#cccccc"];
  Expand_87 [label="Expand(., .)", fillcolor="#cccccc"];
  Concat_88 [label="Concat(., [2], ., [96], axis=0)", fillcolor="#cccccc"];
  Reshape_89 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Slice_90 [label="Slice(., [0], ., [3], [1])", fillcolor="#cccccc"];
  Shape_91 [label="Shape(., start=0)", fillcolor="#d2a81f"];
  Slice_92 [label="Slice(., [-1], [9223372036854775807])", fillcolor="#cccccc"];
  Slice_93 [label="Slice(., [-2], [-1])", fillcolor="#cccccc"];
  Slice_94 [label="Slice(., [-9223372036854775808], [-2])", fillcolor="#cccccc"];
  Concat_95 [label="Concat([-1], ., ., axis=0)", fillcolor="#cccccc"];
  Reshape_96 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Transpose_97 [label="Transpose(., perm=[0, 2, 1])", fillcolor="#ee99ee"];
  Concat_98 [label="Concat(., ., ., axis=0)", fillcolor="#cccccc"];
  Reshape_99 [label="Reshape(., .)", fillcolor="#eeeeee"];
  Mul_100 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
  Mul_101 [label="Mul(., 0.31947157)", fillcolor="#cccccc"];
  Where_102 [label="Where(., 0.0, -inf)", fillcolor="#cccccc"];
  MatMul_103 [label="MatMul(., .)", fillcolor="#ee9999"];
  Add_104 [label="Add(., .)", fillcolor="#cccccc"];
  Softmax_105 [label="Softmax(., axis=-1)", fillcolor="#cccccc"];
  IsNaN_106 [label="IsNaN(.)", fillcolor="#cccccc"];
  Where_107 [label="Where(., 0.0, .)", fillcolor="#cccccc"];
  MatMul_108 [label="MatMul(., .)", fillcolor="#ee9999"];
  Transpose_109 [label="Transpose(., perm=[0, 2, 1, 3])", fillcolor="#ee99ee"];
  Concat_110 [label="Concat(., ., [-1], axis=0)", fillcolor="#cccccc"];
  Reshape_111 [label="Reshape(., .)", fillcolor="#eeeeee"];
  MatMul_112 [label="MatMul(., .)", fillcolor="#ee9999"];
  SkipSimplifiedLayerNormalization_113 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
  MatMul_114 [label="MatMul(., .)", fillcolor="#ee9999"];
  Sigmoid_115 [label="Sigmoid(.)", fillcolor="#cccccc"];
  Mul_116 [label="Mul(., .)", fillcolor="#cccccc"];
  MatMul_117 [label="MatMul(., .)", fillcolor="#ee9999"];
  Mul_118 [label="Mul(., .)", fillcolor="#cccccc"];
  MatMul_119 [label="MatMul(., .)", fillcolor="#ee9999"];
  SkipSimplifiedLayerNormalization_120 [label="com.microsoft.\nSkipSimplifiedLayerNormalization(., ., .)", fillcolor="#cccccc"];
  MatMul_121 [label="MatMul(., .)", fillcolor="#ee9999"];
  I_0 -> Shape_16 [label="INT64(batch,seq_length)"];
  Shape_16 -> Squeeze_17 [label="INT64(1)"];
  I_2 -> Shape_18 [label="INT64(batch,seq_length)"];
  Shape_18 -> Squeeze_19 [label="INT64(1)"];
  I_2 -> Shape_20 [label="INT64(batch,seq_length)"];
  Shape_20 -> Squeeze_21 [label="INT64(1)"];
  I_3 -> Shape_22 [label="FLOAT(batch,1,cache_length,96)"];
  Shape_22 -> Squeeze_23 [label="INT64(1)"];
  I_4 -> Shape_24 [label="FLOAT(batch,1,cache_length,96)"];
  Shape_24 -> Squeeze_25 [label="INT64(1)"];
  Squeeze_23 -> Add_26 [label="INT64()"];
  Squeeze_17 -> Add_26 [label="INT64()"];
  Squeeze_17 -> Max_27 [label="INT64()"];
  Squeeze_21 -> Max_27 [label="INT64()"];
  Squeeze_23 -> Add_28 [label="INT64()"];
  Max_27 -> Add_28 [label="INT64()"];
  Squeeze_25 -> Add_29 [label="INT64()"];
  Squeeze_17 -> Add_29 [label="INT64()"];
  i_7 -> Gather_30 [label="FLOAT(32000, 192)"];
  I_0 -> Gather_30 [label="INT64(batch,seq_length)"];
  Squeeze_23 -> Range_31 [label="INT64()"];
  Add_26 -> Range_31 [label="INT64()"];
  I_1 -> Cast_32 [label="INT64(batch,cache+seq)"];
  Squeeze_19 -> Range_33 [label="INT64()"];
  Add_26 -> Range_34 [label="INT64()"];
  Range_33 -> Unsqueeze_35 [label="INT64(batch)"];
  Unsqueeze_35 -> Unsqueeze_36 [label="INT64(batch,1,1)"];
  Range_31 -> Unsqueeze_37 [label="INT64(seq_length)"];
  Unsqueeze_37 -> Unsqueeze_38 [label="INT64(1,1,seq_length)"];
  Range_34 -> Unsqueeze_39 [label="INT64(cache_length + seq_length)"];
  Unsqueeze_39 -> Unsqueeze_40 [label="INT64(1,1,cache_length + seq_length)"];
  Unsqueeze_40 -> LessOrEqual_41 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
  Unsqueeze_38 -> LessOrEqual_41 [label="INT64(1,1,seq_length,1)"];
  LessOrEqual_41 -> And_42 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
  Unsqueeze_36 -> Max_43 [label="INT64(batch,1,1,1)"];
  Unsqueeze_40 -> Max_43 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
  Max_43 -> Shape_44 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
  Unsqueeze_36 -> Expand_45 [label="INT64(batch,1,1,1)"];
  Shape_44 -> Expand_45 [label="INT64(4)"];
  Expand_45 -> Unsqueeze_46 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
  Unsqueeze_40 -> Expand_47 [label="INT64(1,\n1,\n1,\ncache_length + seq_length)"];
  Shape_44 -> Expand_47 [label="INT64(4)"];
  Expand_47 -> Unsqueeze_48 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length)"];
  Unsqueeze_46 -> Concat_49 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
  Unsqueeze_48 -> Concat_49 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n1)"];
  Cast_32 -> GatherND_50 [label="BOOL(batch,cache+seq)"];
  Concat_49 -> GatherND_50 [label="INT64(batch,\n1,\n1,\ncache_length + seq_length,\n2)"];
  And_42 -> And_51 [label="BOOL(1,\n1,\nseq_length,\ncache_length + seq_length)"];
  GatherND_50 -> And_51 [label="BOOL(batch,\n1,\n1,\ncache_length + seq_length)"];
  Add_26 -> Reshape_52 [label="INT64()"];
  Shape_18 -> Concat_53 [label="INT64(1)"];
  Shape_16 -> Concat_53 [label="INT64(1)"];
  Reshape_52 -> Concat_53 [label="INT64(1)"];
  And_51 -> Expand_54 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
  Concat_53 -> Expand_54 [label="INT64(4)"];
  Gather_30 -> SimplifiedLayerNormalization_55 [label="FLOAT(batch,seq_length,192)"];
  i_5 -> SimplifiedLayerNormalization_55 [label="FLOAT(192)"];
  SimplifiedLayerNormalization_55 -> MatMul_56 [label="FLOAT(batch,seq_length,192)"];
  i_8 -> MatMul_56 [label="FLOAT(192, 192)"];
  Shape_18 -> Concat_57 [label="INT64(1)"];
  Shape_16 -> Concat_57 [label="INT64(1)"];
  MatMul_56 -> Reshape_58 [label="FLOAT(batch,seq_length,192)"];
  Concat_57 -> Reshape_58 [label="INT64(4)"];
  Reshape_58 -> Transpose_59 [label="FLOAT(batch,seq_length,2,96)"];
  SimplifiedLayerNormalization_55 -> MatMul_60 [label="FLOAT(batch,seq_length,192)"];
  i_9 -> MatMul_60 [label="FLOAT(192, 96)"];
  MatMul_60 -> Reshape_61 [label="FLOAT(batch,seq_length,96)"];
  Concat_57 -> Reshape_61 [label="INT64(4)"];
  Reshape_61 -> Transpose_62 [label="FLOAT(batch,seq_length,1,96)"];
  SimplifiedLayerNormalization_55 -> MatMul_63 [label="FLOAT(batch,seq_length,192)"];
  i_10 -> MatMul_63 [label="FLOAT(192, 96)"];
  MatMul_63 -> Reshape_64 [label="FLOAT(batch,seq_length,96)"];
  Concat_57 -> Reshape_64 [label="INT64(4)"];
  Reshape_64 -> Transpose_65 [label="FLOAT(batch,seq_length,1,96)"];
  I_2 -> ReduceMax_66 [label="INT64(batch,seq_length)"];
  ReduceMax_66 -> Add_67 [label="INT64()"];
  Add_67 -> Range_68 [label="INT64()"];
  Range_68 -> Reshape_69 [label="INT64(?)"];
  Reshape_69 -> Cast_70 [label="INT64(?,1)"];
  Cast_70 -> MatMul_71 [label="FLOAT(?,1)"];
  i_6 -> MatMul_71 [label="FLOAT(1, 48)"];
  MatMul_71 -> Cos_72 [label="FLOAT(?,48)"];
  MatMul_71 -> Sin_73 [label="FLOAT(?,48)"];
  Transpose_59 -> RotaryEmbedding_74 [label="FLOAT(batch,2,seq_length,96)"];
  I_2 -> RotaryEmbedding_74 [label="INT64(batch,seq_length)"];
  Cos_72 -> RotaryEmbedding_74 [label="FLOAT(?,48)"];
  Sin_73 -> RotaryEmbedding_74 [label="FLOAT(?,48)"];
  Transpose_62 -> RotaryEmbedding_75 [label="FLOAT(batch,1,seq_length,96)"];
  I_2 -> RotaryEmbedding_75 [label="INT64(batch,seq_length)"];
  Cos_72 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
  Sin_73 -> RotaryEmbedding_75 [label="FLOAT(?,48)"];
  I_3 -> Concat_76 [label="FLOAT(batch,1,cache_length,96)"];
  RotaryEmbedding_75 -> Concat_76 [label="FLOAT(batch,\n1,\nMax(seq_length, seq_length),\n96)"];
  I_4 -> Concat_77 [label="FLOAT(batch,1,cache_length,96)"];
  Transpose_65 -> Concat_77 [label="FLOAT(batch,1,seq_length,96)"];
  Concat_76 -> Unsqueeze_78;
  Add_28 -> Reshape_79 [label="INT64()"];
  Shape_18 -> Concat_80 [label="INT64(1)"];
  Reshape_79 -> Concat_80 [label="INT64(1)"];
  Unsqueeze_78 -> Expand_81 [label="FLOAT(batch,\n1,\n1,\ncache_length + Max(seq_length, seq_length),\n96)"];
  Concat_80 -> Expand_81 [label="INT64(5)"];
  Shape_18 -> Concat_82 [label="INT64(1)"];
  Reshape_79 -> Concat_82 [label="INT64(1)"];
  Expand_81 -> Reshape_83 [label="FLOAT(batch,\n1,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
  Concat_82 -> Reshape_83 [label="INT64(4)"];
  Concat_77 -> Unsqueeze_84;
  Add_29 -> Reshape_85 [label="INT64()"];
  Shape_18 -> Concat_86 [label="INT64(1)"];
  Reshape_85 -> Concat_86 [label="INT64(1)"];
  Unsqueeze_84 -> Expand_87 [label="FLOAT(batch,\n1,\n1,\ncache_length + seq_length,\n96)"];
  Concat_86 -> Expand_87 [label="INT64(5)"];
  Shape_18 -> Concat_88 [label="INT64(1)"];
  Reshape_85 -> Concat_88 [label="INT64(1)"];
  Expand_87 -> Reshape_89 [label="FLOAT(batch,\n1,\n2,\ncache_length + seq_length,\n96)"];
  Concat_88 -> Reshape_89 [label="INT64(4)"];
  Expand_54 -> Slice_90 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + seq_length)"];
  Reshape_79 -> Slice_90 [label="INT64(1)"];
  Reshape_83 -> Shape_91 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
  Shape_91 -> Slice_92 [label="INT64(4)"];
  Shape_91 -> Slice_93 [label="INT64(4)"];
  Shape_91 -> Slice_94 [label="INT64(4)"];
  Slice_93 -> Concat_95 [label="INT64(1)"];
  Slice_92 -> Concat_95 [label="INT64(1)"];
  Reshape_83 -> Reshape_96 [label="FLOAT(batch,\n2,\ncache_length + Max(seq_length, seq_length),\n96)"];
  Concat_95 -> Reshape_96 [label="INT64(3)"];
  Reshape_96 -> Transpose_97 [label="FLOAT(?,\ncache_length + Max(seq_length, seq_length),\n96)"];
  Slice_94 -> Concat_98 [label="INT64(2)"];
  Slice_92 -> Concat_98 [label="INT64(1)"];
  Slice_93 -> Concat_98 [label="INT64(1)"];
  Transpose_97 -> Reshape_99 [label="FLOAT(?,\n96,\ncache_length + Max(seq_length, seq_length))"];
  Concat_98 -> Reshape_99 [label="INT64(4)"];
  RotaryEmbedding_74 -> Mul_100 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
  Reshape_99 -> Mul_101 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
  Slice_90 -> Where_102 [label="BOOL(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
  Mul_100 -> MatMul_103 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
  Mul_101 -> MatMul_103 [label="FLOAT(batch,\n2,\n96,\ncache_length + Max(seq_length, seq_length))"];
  MatMul_103 -> Add_104 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\ncache_length + Max(seq_length, seq_length))"];
  Where_102 -> Add_104 [label="FLOAT(batch,\n1,\nseq_length,\ncache_length + Max(seq_length, seq_length))"];
  Add_104 -> Softmax_105 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
  Softmax_105 -> IsNaN_106 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
  IsNaN_106 -> Where_107 [label="BOOL(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
  Softmax_105 -> Where_107 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
  Where_107 -> MatMul_108 [label="FLOAT(batch,\n2,\n?,\ncache_length + Max(seq_length, seq_length))"];
  Reshape_89 -> MatMul_108 [label="FLOAT(batch,\n2,\ncache_length + seq_length,\n96)"];
  MatMul_108 -> Transpose_109 [label="FLOAT(batch,\n2,\nMax(seq_length, seq_length),\n96)"];
  Shape_18 -> Concat_110 [label="INT64(1)"];
  Shape_16 -> Concat_110 [label="INT64(1)"];
  Transpose_109 -> Reshape_111 [label="FLOAT(batch,\nMax(seq_length, seq_length),\n2,\n96)"];
  Concat_110 -> Reshape_111 [label="INT64(3)"];
  Reshape_111 -> MatMul_112 [label="FLOAT(batch,\nseq_length,\n((192*Max(seq_length, seq_length))//seq_length))"];
  i_11 -> MatMul_112 [label="FLOAT(192, 192)"];
  MatMul_112 -> SkipSimplifiedLayerNormalization_113 [label="FLOAT(batch,seq_length,192)"];
  Gather_30 -> SkipSimplifiedLayerNormalization_113 [label="FLOAT(batch,seq_length,192)"];
  i_5 -> SkipSimplifiedLayerNormalization_113 [label="FLOAT(192)"];
  SkipSimplifiedLayerNormalization_113 -> MatMul_114 [label="FLOAT(batch,seq_length,192)"];
  i_12 -> MatMul_114 [label="FLOAT(192, 1024)"];
  MatMul_114 -> Sigmoid_115 [label="FLOAT(batch,seq_length,1024)"];
  MatMul_114 -> Mul_116 [label="FLOAT(batch,seq_length,1024)"];
  Sigmoid_115 -> Mul_116 [label="FLOAT(batch,seq_length,1024)"];
  SkipSimplifiedLayerNormalization_113 -> MatMul_117 [label="FLOAT(batch,seq_length,192)"];
  i_13 -> MatMul_117 [label="FLOAT(192, 1024)"];
  Mul_116 -> Mul_118 [label="FLOAT(batch,seq_length,1024)"];
  MatMul_117 -> Mul_118 [label="FLOAT(batch,seq_length,1024)"];
  Mul_118 -> MatMul_119 [label="FLOAT(batch,seq_length,1024)"];
  i_14 -> MatMul_119 [label="FLOAT(1024, 192)"];
  MatMul_119 -> SkipSimplifiedLayerNormalization_120 [label="FLOAT(batch,seq_length,192)"];
  SkipSimplifiedLayerNormalization_113 -> SkipSimplifiedLayerNormalization_120 [label="FLOAT(batch,seq_length,192)"];
  i_5 -> SkipSimplifiedLayerNormalization_120 [label="FLOAT(192)"];
  SkipSimplifiedLayerNormalization_120 -> MatMul_121 [label="FLOAT(batch,seq_length,192)"];
  i_15 -> MatMul_121 [label="FLOAT(192, 32000)"];
  O_122 [label="linear_7\nFLOAT(batch,seq_length,32000)", fillcolor="#aaaaee"];
  MatMul_121 -> O_122;
  O_123 [label="cat_7\nFLOAT(batch,1,cache_length + Max(seq_length, seq_length),96)", fillcolor="#aaaaee"];
  Concat_76 -> O_123;
  O_124 [label="cat_8\nFLOAT(batch,1,cache_length + seq_length,96)", fillcolor="#aaaaee"];
  Concat_77 -> O_124;
}
Next
onnx_diagnostic.helpers.fake_tensor_helper
Previous
onnx_diagnostic.helpers.doc_helper
Copyright © 2025
Made with Sphinx and @pradyunsg's Furo
On this page
  • onnx_diagnostic.helpers.dot_helper
    • to_dot()