experimental_experiment.torch_interpreter._aten_functions_attention

See https://pytorch.org/docs/stable/torch.compiler_ir.html for the full list of aten functions.

experimental_experiment.torch_interpreter._aten_functions_attention.aten__scaled_dot_product_efficient_attention(g: GraphBuilder, sts: Dict[str, Any] | None, outputs: List[str], query: str, key: str, value: str, attn_bias: str | None, compute_log_sumexp: bool, dropout_p: float = 0.0, is_causal: bool = False, scale: float | None = None, name: str = '_scaled_dot_product_efficient_attention') Tuple[str, str, str, str][source]

_scaled_dot_product_efficient_attention (cuda)

experimental_experiment.torch_interpreter._aten_functions_attention.aten__scaled_dot_product_flash_attention_for_cpu(g: GraphBuilder, sts: Dict[str, Any] | None, outputs: List[str], query: str, key: str, value: str, dropout_p: float = 0.0, is_causal: bool = False, attn_mask: str | None = None, scale: float | None = None, return_debug_mask: bool = False, name: str = '_scaled_dot_product_flash_attention_for_cpu_default') Tuple[str, str, str, str, str, str, str, str, str][source]

_scaled_dot_product_flash_attention

experimental_experiment.torch_interpreter._aten_functions_attention.aten_scaled_dot_product_attention(g: GraphBuilder, sts: Dict[str, Any] | None, outputs: List[str], query: str, key: str, value: str, attn_mask: str | None = None, dropout_p: float = 0.0, is_causal: bool = False, scale: float | None = None, enable_gqa: bool = False, name: str = 'aten_scaled_dot_product_attention')[source]

scaled_dot_product_attention