Source code for mlinsights.sklapi.sklearn_base_transform_stacking
import textwrap
import numpy
from .sklearn_base_transform import SkBaseTransform
from .sklearn_base_transform_learner import SkBaseTransformLearner
[docs]
class SkBaseTransformStacking(SkBaseTransform):
"""
Un *transform* qui cache plusieurs *learners*, arrangés
selon la méthode du `stacking <http://blog.kaggle.com/2016/12/27/a-kagglers-guide-to-model-stacking-in-practice/>`_.
.. exref::
:title: Stacking de plusieurs learners dans un pipeline scikit-learn.
:tag: sklearn
:lid: ex-pipe2learner2
Ce *transform* assemble les résultats de plusieurs learners.
Ces features servent d'entrée à un modèle de stacking.
.. runpython::
:showcode:
:warningout: FutureWarning
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.pipeline import make_pipeline
from mlinsights.sklapi import SkBaseTransformStacking
data = load_iris()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y)
trans = SkBaseTransformStacking([LogisticRegression(),
DecisionTreeClassifier()])
trans.fit(X_train, y_train)
pred = trans.transform(X_test)
print(pred[3:])
"""
def __init__(self, models=None, method=None, **kwargs):
"""
@param models list of learners
@param method methods or list of methods to call
to convert features into prediction
(see below)
@param kwargs parameters
Available options for parameter *method*:
* ``'predict'``
* ``'predict_proba'``
* ``'decision_function'``
* a function
If *method is None*, the default value is first
``predict_proba`` it it exists then ``predict``.
"""
super().__init__(**kwargs)
if models is None:
raise ValueError("models cannot be None")
if not isinstance(models, list):
raise TypeError(f"models must be a list not {type(models)}")
if method is None:
method = "predict"
if not isinstance(method, str):
raise TypeError(f"Method must be a string not {type(method)}")
self.method = method
if isinstance(method, list):
if len(method) != len(models):
raise ValueError(
f"models and methods must have the same "
f"length: {len(models)} != {len(method)}."
)
else:
method = [method for m in models]
def convert2transform(c, new_learners):
"converting function into a transform"
m, me = c
if isinstance(m, SkBaseTransformLearner):
if me == m.method:
return m
res = SkBaseTransformLearner(m.model, me)
new_learners.append(res)
return res
if hasattr(m, "transform"):
return m
res = SkBaseTransformLearner(m, me)
new_learners.append(res)
return res
new_learners = []
res = [convert2transform(c, new_learners) for c in zip(models, method)]
if not new_learners:
# We need to do that to avoid creating new objects
# when it is not necessary. This behavior is not
# supported anymore by scikit-learn.
# See sklearn.base.py
self.models = models
else:
self.models = res
[docs]
def fit(self, X, y=None, **kwargs):
"""
Trains a model.
@param X features
@param y targets
@param kwargs additional parameters
@return self
"""
for m in self.models:
m.fit(X, y=y, **kwargs)
return self
[docs]
def transform(self, X):
"""
Calls the learners predictions to convert
the features.
@param X features
@return prédictions
"""
Xs = [m.transform(X) for m in self.models]
return numpy.hstack(Xs)
##############
# cloning API
##############
[docs]
def get_params(self, deep=True):
"""
Returns the parameters which define the object.
It follows :epkg:`scikit-learn` API.
@param deep unused here
@return dict
"""
res = self.P.to_dict()
res["models"] = self.models
res["method"] = self.method
if deep:
for i, m in enumerate(self.models):
par = m.get_params(deep)
for k, v in par.items():
res[f"models_{i}__" + k] = v
return res
[docs]
def set_params(self, **values):
"""
Sets the parameters.
@param params parameters
"""
if "models" in values:
self.models = values["models"]
del values["models"]
if "method" in values:
self.method = values["method"]
del values["method"]
for k, _v in values.items():
if not k.startswith("models_"):
raise ValueError(f"Parameter {k!r} must start with 'models_'.")
d = len("models_")
pars = [{} for m in self.models]
for k, v in values.items():
si = k[d:].split("__", 1)
i = int(si[0])
pars[i][k[d + 1 + len(si) :]] = v
for p, m in zip(pars, self.models):
if p:
m.set_params(**p)
#################
# common methods
#################
def __repr__(self):
"""
usual
"""
rps = repr(self.P)
res = "{0}([{1}], [{2}], {3})".format( # noqa: UP030
self.__class__.__name__,
", ".join(repr(m.model if hasattr(m, "model") else m) for m in self.models),
", ".join(
repr(m.method if hasattr(m, "method") else None) for m in self.models
),
rps,
)
return "\n".join(textwrap.wrap(res, subsequent_indent=" "))