import numpy
import pandas
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
class FeaturizerTypeError(TypeError):
"""
Unable to process a type.
"""
pass
[docs]
def model_featurizer(model, **params):
"""
Converts a machine learned model into a function which converts
a vector into features produced by the model.
It can be the output itself or intermediate results.
The model can come from :epkg:`scikit-learn`,
:epkg:`torch`.
:param model: model
:param params: additional parameters
:return: function
"""
tried = []
if isinstance(model, LogisticRegression):
return model_featurizer_lr(model, **params)
tried.append(LogisticRegression)
if isinstance(model, RandomForestClassifier):
return model_featurizer_rfc(model, **params)
tried.append(RandomForestClassifier)
if hasattr(model, "layers"):
# It should be a keras model.
return model_featurizer_keras(model, **params)
tried.append("Keras")
if hasattr(model, "forward"):
# It should be a torch model.
return model_featurizer_torch(model, **params)
tried.append("torch")
raise FeaturizerTypeError( # pragma no cover
"Unable to process type %r, allowed:\n%s"
% (type(model), "\n".join(sorted(str(_) for _ in tried)))
)
def is_vector(X):
"""
Tells if *X* is a vector.
@param X vector
@return boolean
"""
if isinstance(X, list):
if not X or isinstance(X[0], (list, tuple)):
return False
return True
if isinstance(X, numpy.ndarray):
if len(X.shape) > 1 and X.shape[0] != 1:
return False
return True
if isinstance(X, pandas.DataFrame):
if len(X.shape) > 1 and X.shape[0] != 1:
return False
return True
raise TypeError( # pragma no cover
f"Unable to guess if X is a vector, type(X)={type(X)}"
)
def wrap_predict_sklearn(X, fct, many):
"""
Checks types and dimension.
Calls *fct* and returns the approriate type.
A vector if *X* is a vector, the raw output
otherwise.
@param X vector or list
@param fct function
@param many many observations or just one
"""
isv = is_vector(X)
assert many != isv, "Inconsistency X is a single vector, many is True"
if isv:
X = [X]
y = fct(X)
if isv:
y = y.ravel()
return y
def model_featurizer_lr(model):
"""
Builds a featurizer from a :epkg:`scikit-learn:linear_model:LogisticRegression`.
It returns a function which returns ``model.decision_function(X)``.
@param model model to use to featurize a vector
@return function
"""
def feat(X, model, many):
"wraps sklearn"
return wrap_predict_sklearn(X, model.decision_function, many)
return lambda X, many, model=model: feat(X, model, many)
def model_featurizer_rfc(model, output=True):
"""
Builds a featurizer from a :epkg:`scikit-learn:ensemble:RandomForestClassifier`.
It returns a function which returns the output of every tree
(method *apply*).
@param model model to use to featurize a vector
@param output use output (``model.predict_proba(X)``)
or trees output (``model.apply(X)``)
@return function
"""
if output:
def feat1(X, model, many):
"wraps sklearn"
return wrap_predict_sklearn(X, model.predict_proba, many)
return lambda X, many, model=model: feat1(X, model, many)
def feat2(X, model, many):
"wraps sklearn"
return wrap_predict_sklearn(X, model.apply, many)
return lambda X, many, model=model: feat2(X, model, many)
def wrap_predict_keras(X, fct, many, shapes):
"""
Checks types and dimension.
Calls *fct* and returns the approriate type.
A vector if *X* is a vector, the raw output
otherwise.
@param X vector or list
@param fct function
@param many many observations or just one
@param shapes expected input shapes for the neural network
"""
if many:
y = [fct(X[i]).ravel() for i in range(X.shape[0])]
return numpy.stack(y)
if len(X.shape) == len(shapes):
return fct(X).ravel()
x = X[numpy.newaxis, :, :, :]
return fct(x).ravel()
def model_featurizer_keras(model, layer=None):
"""
Builds a featurizer from a :epkg:`keras` model
It returns a function which returns the output of one
particular layer.
@param model model to use to featurize a vector
@param layer number of layers to keep
@return function
See `About Keras models <https://keras.io/models/about-keras-models/>`_.
"""
if layer is not None:
output = model.layers[layer].output
model = model.__class__(model.input, output)
def feat(X, model, many, shapes):
"wraps keras"
return wrap_predict_keras(X, model.predict, many, shapes)
return lambda X, many, model=model, shapes=model._feed_input_shapes[0]: feat(
X, model, many, shapes
)
def wrap_predict_torch(X, fct, many, shapes):
"""
Checks types and dimension.
Calls *fct* and returns the approriate type.
A vector if *X* is a vector, the raw output
otherwise.
@param X vector or list
@param fct function
@param many many observations or just one
@param shapes expected input shapes for the neural network
"""
if many:
y = [fct(X[i]).ravel() for i in range(X.shape[0])]
return numpy.stack(y)
if shapes is None or len(X.shape) == len(shapes):
t = fct(X)
nt = t.detach().numpy().ravel()
else:
x = X[numpy.newaxis, :, :, :]
t = fct(x)
nt = t.detach().numpy().ravel()
return nt
def model_featurizer_torch(model, layer=None):
"""
Builds a featurizer from a :epkg:`torch` model
It returns a function which returns the output of one
particular layer.
@param model model to use to featurize a vector
@param layer number of layers to keep
@return function
"""
if layer is not None:
output = model.layers[layer].output
model = model.__class__(model.input, output)
def feat(X, model, many, shapes):
"wraps torch"
return wrap_predict_torch(X, model.forward, many, shapes)
return lambda X, many, model=model, shapes=None: feat(X, model, many, shapes)