Source code for mlinsights.mlmodel.ml_featurizer

import numpy
import pandas
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier


class FeaturizerTypeError(TypeError):
    """
    Unable to process a type.
    """

    pass


[docs] def model_featurizer(model, **params): """ Converts a machine learned model into a function which converts a vector into features produced by the model. It can be the output itself or intermediate results. The model can come from :epkg:`scikit-learn`, :epkg:`torch`. :param model: model :param params: additional parameters :return: function """ tried = [] if isinstance(model, LogisticRegression): return model_featurizer_lr(model, **params) tried.append(LogisticRegression) if isinstance(model, RandomForestClassifier): return model_featurizer_rfc(model, **params) tried.append(RandomForestClassifier) if hasattr(model, "layers"): # It should be a keras model. return model_featurizer_keras(model, **params) tried.append("Keras") if hasattr(model, "forward"): # It should be a torch model. return model_featurizer_torch(model, **params) tried.append("torch") raise FeaturizerTypeError( # pragma no cover "Unable to process type %r, allowed:\n%s" % (type(model), "\n".join(sorted(str(_) for _ in tried))) )
def is_vector(X): """ Tells if *X* is a vector. @param X vector @return boolean """ if isinstance(X, list): if not X or isinstance(X[0], (list, tuple)): return False return True if isinstance(X, numpy.ndarray): if len(X.shape) > 1 and X.shape[0] != 1: return False return True if isinstance(X, pandas.DataFrame): if len(X.shape) > 1 and X.shape[0] != 1: return False return True raise TypeError( # pragma no cover f"Unable to guess if X is a vector, type(X)={type(X)}" ) def wrap_predict_sklearn(X, fct, many): """ Checks types and dimension. Calls *fct* and returns the approriate type. A vector if *X* is a vector, the raw output otherwise. @param X vector or list @param fct function @param many many observations or just one """ isv = is_vector(X) assert many != isv, "Inconsistency X is a single vector, many is True" if isv: X = [X] y = fct(X) if isv: y = y.ravel() return y def model_featurizer_lr(model): """ Builds a featurizer from a :epkg:`scikit-learn:linear_model:LogisticRegression`. It returns a function which returns ``model.decision_function(X)``. @param model model to use to featurize a vector @return function """ def feat(X, model, many): "wraps sklearn" return wrap_predict_sklearn(X, model.decision_function, many) return lambda X, many, model=model: feat(X, model, many) def model_featurizer_rfc(model, output=True): """ Builds a featurizer from a :epkg:`scikit-learn:ensemble:RandomForestClassifier`. It returns a function which returns the output of every tree (method *apply*). @param model model to use to featurize a vector @param output use output (``model.predict_proba(X)``) or trees output (``model.apply(X)``) @return function """ if output: def feat1(X, model, many): "wraps sklearn" return wrap_predict_sklearn(X, model.predict_proba, many) return lambda X, many, model=model: feat1(X, model, many) def feat2(X, model, many): "wraps sklearn" return wrap_predict_sklearn(X, model.apply, many) return lambda X, many, model=model: feat2(X, model, many) def wrap_predict_keras(X, fct, many, shapes): """ Checks types and dimension. Calls *fct* and returns the approriate type. A vector if *X* is a vector, the raw output otherwise. @param X vector or list @param fct function @param many many observations or just one @param shapes expected input shapes for the neural network """ if many: y = [fct(X[i]).ravel() for i in range(X.shape[0])] return numpy.stack(y) if len(X.shape) == len(shapes): return fct(X).ravel() x = X[numpy.newaxis, :, :, :] return fct(x).ravel() def model_featurizer_keras(model, layer=None): """ Builds a featurizer from a :epkg:`keras` model It returns a function which returns the output of one particular layer. @param model model to use to featurize a vector @param layer number of layers to keep @return function See `About Keras models <https://keras.io/models/about-keras-models/>`_. """ if layer is not None: output = model.layers[layer].output model = model.__class__(model.input, output) def feat(X, model, many, shapes): "wraps keras" return wrap_predict_keras(X, model.predict, many, shapes) return lambda X, many, model=model, shapes=model._feed_input_shapes[0]: feat( X, model, many, shapes ) def wrap_predict_torch(X, fct, many, shapes): """ Checks types and dimension. Calls *fct* and returns the approriate type. A vector if *X* is a vector, the raw output otherwise. @param X vector or list @param fct function @param many many observations or just one @param shapes expected input shapes for the neural network """ if many: y = [fct(X[i]).ravel() for i in range(X.shape[0])] return numpy.stack(y) if shapes is None or len(X.shape) == len(shapes): t = fct(X) nt = t.detach().numpy().ravel() else: x = X[numpy.newaxis, :, :, :] t = fct(x) nt = t.detach().numpy().ravel() return nt def model_featurizer_torch(model, layer=None): """ Builds a featurizer from a :epkg:`torch` model It returns a function which returns the output of one particular layer. @param model model to use to featurize a vector @param layer number of layers to keep @return function """ if layer is not None: output = model.layers[layer].output model = model.__class__(model.input, output) def feat(X, model, many, shapes): "wraps torch" return wrap_predict_torch(X, model.forward, many, shapes) return lambda X, many, model=model, shapes=None: feat(X, model, many, shapes)