Source code for onnx_extended.helper.make_dynamic_quantize_linear

from typing import Dict, Optional
from onnx import AttributeProto, FunctionProto, TensorProto
from onnx.helper import (
    make_function,
    make_node,
    make_opsetid,
    make_tensor,
)



[docs]
def make_dynamic_quantize_linear_function_proto(
    domain: str, opset: int, to: Optional[int] = None
) -> FunctionProto:
    """
    Creates the FunctionProto for function `DynamicQuantizeLinear`
    doing a quantization to float 8.

    :param domain: local domain name
    :param opset: opset to use to define the function
    :param to: if None, the function has an attribute,
        otherwise, it is replaced by the given value
    :return: FunctionProto

    The function takes 1 input and returns 3 outputs like
    operator `DynamicQuantizeLinear
    <https://onnx.ai/onnx/operators/onnx__DynamicQuantizeLinear.html>`_.
    It has one attribute *to* which specified the quantized type.
    """
    normalization_values = list(
        {
            TensorProto.FLOAT8E4M3FN: 100.057724,
            TensorProto.FLOAT8E4M3FNUZ: 54.26635,
            TensorProto.FLOAT8E5M2: 9535.286,
            TensorProto.FLOAT8E5M2FNUZ: 9403.499,
        }.items()
    )

    if to is None:
        cast = make_node("Cast", ["zerof"], ["Zeropoint"])
        att = AttributeProto()
        att.name = "to"
        att.ref_attr_name = "to"
        att.type = AttributeProto.INT
        cast.attribute.append(att)

        cst = make_node("Constant", [], ["vto"])
        att = AttributeProto()
        att.name = "value_int"
        att.ref_attr_name = "to"
        att.type = AttributeProto.INT
        cst.attribute.append(att)
    else:
        cast = make_node("Cast", ["zerof"], ["Zeropoint"], to=to)
        cst = make_node("Constant", [], ["vto"], value_int=to)

    nodes = [
        make_node(
            "Constant",
            [],
            ["zerof"],
            value=make_tensor("zerof", TensorProto.FLOAT, [], [0]),
        ),
        make_node(
            "Constant",
            [],
            ["newshape"],
            value=make_tensor("newshape", TensorProto.INT64, [1], [-1]),
        ),
        make_node("CastLike", ["zerof", "x"], ["zero"]),
        cast,
        make_node("IsNaN", ["x"], ["nanxp"]),
        make_node("Not", ["nanxp"], ["nanx"]),
        make_node("CastLike", ["nanx", "x"], ["nanxc"]),
        make_node("Where", ["nanx", "x", "zero"], ["xf"]),
        make_node("Mul", ["xf", "xf"], ["xsquare"]),
        make_node("ReduceSum", ["xsquare"], ["Num"], keepdims=0),
        make_node("ReduceSum", ["nanxc"], ["Den"], keepdims=0),
        make_node("Div", ["Num", "Den"], ["Dev"]),
        make_node("Sqrt", ["Dev"], ["Scale"]),
        cst,
        make_node("Reshape", ["vto", "newshape"], ["vtotensor"]),
        make_node(
            "LabelEncoder",
            ["vtotensor"],
            ["stdftensor"],
            keys_int64s=[v[0] for v in normalization_values],
            values_floats=[v[1] for v in normalization_values],
            domain="ai.onnx.ml",
        ),
        make_node("ReduceSum", ["stdftensor"], ["stdf"], keepdims=0),
        make_node("CastLike", ["stdf", "Scale"], ["std"]),
        make_node("Div", ["Scale", "std"], ["ScaleScaled"]),
        make_node("QuantizeLinear", ["x", "ScaleScaled", "Zeropoint"], ["y"]),
    ]
    return make_function(
        domain,
        "DynamicQuantizeLinear",
        ["x"],
        ["y", "ScaleScaled", "Zeropoint"],
        nodes,
        opset_imports=[make_opsetid("", opset), make_opsetid("ai.onnx.ml", 2)],
        attributes=["to"],
    )




[docs]
def make_simple_dynamic_quantize_linear_function_proto(
    domain: str, opset: int, to: int = TensorProto.FLOAT8E4M3FN
) -> FunctionProto:
    """
    Creates the FunctionProto for function `SimpleDynamicQuantizeLinear`
    doing a quantization to float 8. A suffix is added to the function name
    to tell which type is used for the quantization. It does not
    support nan values.

    :param domain: local domain name
    :param opset: opset to use to define the function
    :param to: type to quantize into, it is hardcoded
    :return: FunctionProto

    The function takes 1 input and returns 3 outputs like
    operator `DynamicQuantizeLinear
    <https://onnx.ai/onnx/operators/onnx__DynamicQuantizeLinear.html>`_.
    It has one attribute *to* which specified the quantized type.
    """
    normalization_values: Dict[int, float] = {
        TensorProto.FLOAT8E4M3FN: 100.057724,
        TensorProto.FLOAT8E4M3FNUZ: 54.26635,
        TensorProto.FLOAT8E5M2: 9535.286,
        TensorProto.FLOAT8E5M2FNUZ: 9403.499,
    }
    suffix: Dict[int, str] = {
        TensorProto.FLOAT8E4M3FN: "E4M3FN",
        TensorProto.FLOAT8E4M3FNUZ: "E4M3FNUZ",
        TensorProto.FLOAT8E5M2: "E5M2",
        TensorProto.FLOAT8E5M2FNUZ: "E5M2FNUZ",
    }

    nodes = [
        make_node(
            "Constant",
            [],
            ["zerof"],
            value=make_tensor("zerof", TensorProto.FLOAT, [], [0]),
        ),
        make_node("Cast", ["zerof"], ["Zeropoint"], to=to),
        make_node(
            "Constant",
            [],
            ["stdf"],
            value=make_tensor(
                "stdf", TensorProto.FLOAT, [], [normalization_values[to]]
            ),
        ),
        make_node(
            "Constant",
            [],
            ["newshape"],
            value=make_tensor("newshape", TensorProto.INT64, [1], [-1]),
        ),
        make_node("Mul", ["x", "x"], ["xsquare"]),
        make_node("ReduceMean", ["xsquare"], ["Dev"], keepdims=0),
        make_node("Sqrt", ["Dev"], ["Scale"]),
        make_node("CastLike", ["stdf", "Scale"], ["std"]),
        make_node("Div", ["Scale", "std"], ["ScaleScaled"]),
        make_node("QuantizeLinear", ["x", "ScaleScaled", "Zeropoint"], ["y"]),
    ]
    return make_function(
        domain,
        f"DynamicQuantizeLinear{suffix[to]}",
        ["x"],
        ["y", "ScaleScaled", "Zeropoint"],
        nodes,
        opset_imports=[make_opsetid("", opset)],
        attributes=["to"],
    )