Source code for experimental_experiment.skl.convert

import time
from typing import Dict, List, Optional, Sequence, Set, Tuple, Union
import numpy as np
from onnx import ModelProto, save_model
from onnx.model_container import ModelContainer
import sklearn
from ..xbuilder import GraphBuilder, FunctionOptions, OptimizationOptions



[docs]
def to_onnx(
    model: sklearn.base.BaseEstimator,
    args: Optional[Sequence["torch.Tensor"]] = None,  # noqa: F821
    target_opset: Optional[Union[int, Dict[str, int]]] = None,
    as_function: bool = False,
    options: Optional[OptimizationOptions] = None,
    optimize: bool = True,
    filename: Optional[str] = None,
    inline: bool = False,
    input_names: Optional[Sequence[str]] = None,
    output_names: Optional[List[str]] = None,
    large_model: bool = False,
    verbose: int = 0,
    return_builder: bool = False,
    raise_list: Optional[Set[str]] = None,
    external_threshold: int = 1024,
    return_optimize_report: bool = False,
    function_options: Optional[FunctionOptions] = None,
) -> Union[
    Union[ModelProto, ModelContainer],
    Tuple[Union[ModelProto, ModelContainer], GraphBuilder],
]:
    """
    Exports a :epkg:`scikit-learn` model into ONNX.

    :param model: estimator
    :param args: input arguments
    :param kwargs: keyword attributes
    :param input_names: input names
    :param target_opset: targeted opset or targeted opsets as a dictionary
    :param as_function: export as a ModelProto or a FunctionProto
    :param options: optimization options
    :param verbose: verbosity level
    :param return_builder: returns the builder as well
    :param raise_list: the builder stops any time a name falls into that list,
        this is a debbuging tool
    :param optimize: optimize the model before exporting into onnx
    :param large_model: if True returns a :class:`onnx.model_container.ModelContainer`,
        it lets the user to decide later if the weights should be part of the model
        or saved as external weights
    :param external_threshold: if large_model is True, every tensor above this limit
        is stored as external
    :param return_optimize_report: returns statistics on the optimization as well
    :param filename: if specified, stores the model into that file
    :param inline: inline the model before converting to onnx, this is done before
            any optimization takes place
    :param export_options: to apply differents options before to get the exported program
    :param function_options: to specify what to do with the initializers in local functions,
        add them as constants or inputs
    :param output_names: to rename the output names
    :return: onnx model
    """
    assert isinstance(
        model, sklearn.base.BaseEstimator
    ), f"Unexpected model type {type(model)}"
    import skl2onnx

    if output_names is None:
        if hasattr(model, "get_feature_names_out"):
            output_names = model.get_feature_names_out()

    if args is None:
        if hasattr(model, "n_features_in_"):
            n = model.n_features_in_
        else:
            raise NotImplementedError(
                f"Unable to guess the number of input features for model type {type(model)}"
            )
        args = np.random.randn(2, n).astype(np.float32)

    if isinstance(
        model,
        (
            sklearn.pipeline.Pipeline,
            sklearn.pipeline.FeatureUnion,
            sklearn.compose.ColumnTransformer,
            sklearn.compose.TransformedTargetRegressor,
        ),
    ):
        raise NotImplementedError(f"not implemented yet for {type(model)}")

    add_stats = {}
    begin = time.perf_counter()
    if verbose:
        print(f"[skl.to_onnx] convert {model.__class__.__name__}")
    proto = skl2onnx.to_onnx(
        model,
        args[0],
        target_opset=target_opset,
        options={"zipmap": False} if sklearn.base.is_classifier(model) else None,
        verbose=max(verbose - 1, 0),
    )
    t = time.perf_counter()
    add_stats["time_export"] = t - begin
    add_stats[f"time_export_{model.__class__.__name__}"] = t - begin
    begin = t

    if verbose:
        print(f"[skl.to_onnx] builds {model.__class__.__name__}")
    builder = GraphBuilder(
        target_opset_or_existing_proto=proto,
        as_function=as_function,
        optimization_options=options,
        args=args,
        kwargs=None,
        verbose=verbose,
        raise_list=raise_list,
        graph_module=model,
        output_names=output_names,
    )

    if input_names:
        renames = dict(zip(builder.input_names, input_names))
        if verbose:
            print(f"[skl.to_onnx] renames {renames}")
        builder.rename_names(renames)

    t = time.perf_counter()
    add_stats["time_builder"] = t - begin
    add_stats[f"time_builder_{model.__class__.__name__}"] = t - begin
    begin = t

    if verbose:
        print(f"[skl.to_onnx] make_proto for {model.__class__.__name__}")
    onx, stats = builder.to_onnx(
        optimize=optimize,
        large_model=large_model,
        external_threshold=external_threshold,
        return_optimize_report=True,
        inline=inline,
        function_options=function_options,
    )
    t = time.perf_counter()
    add_stats["time_builder_to_onnx"] = t - begin
    add_stats[f"time_builder_to_onnx_{model.__class__.__name__}"] = t - begin
    begin = time.perf_counter()

    if verbose:
        print(f"[skl.to_onnx] done {model.__class__.__name__}")

    all_stats = dict(builder=builder.statistics_)
    if stats:
        add_stats["optimization"] = stats
    t = time.perf_counter()
    add_stats["time_export_to_onnx"] = t - begin

    if verbose:
        proto = onx if isinstance(onx, ModelProto) else onx.model_proto
        print(
            f"[to_onnx] to_onnx done in {t - begin}s "
            f"and {len(proto.graph.node)} nodes, "
            f"{len(proto.graph.initializer)} initializers, "
            f"{len(proto.graph.input)} inputs, "
            f"{len(proto.graph.output)} outputs"
        )
        if verbose >= 10:
            print(builder.get_debug_msg())

    if filename:
        if isinstance(onx, ModelProto):
            save_model(onx, filename)
        else:
            onx.save(filename, all_tensors_to_one_file=True)

    all_stats.update(add_stats)
    if return_builder:
        return (onx, builder, all_stats) if return_optimize_report else (onx, builder)
    return (onx, all_stats) if return_optimize_report else onx