Source code for mlinsights.search_rank.search_engine_predictions
from ..mlmodel import model_featurizer
from ..helpers.parameters import format_function_call
from .search_engine_vectors import SearchEngineVectors
[docs]
class SearchEnginePredictions(SearchEngineVectors):
"""
Extends class :class:`SearchEngineVectors
<mlinsights.search_rank.search_engine_vectors.SearchEngineVectors>`
by looking for neighbors to a vector *X* by
looking neighbors to *f(X)* and not *X*.
*f* can be any function which converts a vector
into another one or a machine learned model.
In that case, *f* will be set to a default behavior.
See function :func:`mlinsights.mlmodel.ml_featurizer.model_featurizer`.
:param fct: function *f* applied before looking for neighbors,
it can also be a machine learned model
:param fct_params: parameters sent to function
:func:`mlinsights.mlmodel.ml_featurizer.model_featurizer`
:param knn: list of parameters, see :class:`sklearn.neighbors.NearestNeighbors`
"""
def __init__(self, fct, fct_params=None, **knn):
super().__init__(**knn)
self._fct_params = fct_params
self._fct_init = fct
if (
callable(fct)
and not hasattr(fct, "predict")
and not hasattr(fct, "forward")
):
self.fct = fct
else:
if fct_params is None:
fct_params = {}
self.fct = model_featurizer(fct, **fct_params)
def __repr__(self):
"""
usual
"""
if self.pknn:
pp = self.pknn.copy()
else:
pp = {}
pp["fct"] = self._fct_init
pp["fct_params"] = self._fct_params
return format_function_call(self.__class__.__name__, pp)
[docs]
def fit(self, data=None, features=None, metadata=None):
"""
Every vector comes with a list of metadata.
:param data: a :epkg:`dataframe` or None if the
the features and the metadata are specified with an array and a
dictionary
:param features: features columns or an array
:param metadata: data
:return: self
"""
iterate = self._is_iterable(data)
if iterate:
self._prepare_fit(
data=data, features=features, metadata=metadata, transform=self.fct
)
else:
self._prepare_fit(data=data, features=features, metadata=metadata)
assert not isinstance(
self.features_, list
), "features_ cannot be a list when training the model."
self.features_ = self.fct(self.features_, True)
return self._fit_knn()
[docs]
def kneighbors(self, X, n_neighbors=None):
"""
Searches for neighbors close to *X*.
@param X features
@return score, ind, meta
*score* is an array representing the lengths to points,
*ind* contains the indices of the nearest points in the population matrix,
*meta* is the metadata.
"""
xp = self.fct(X, False)
if len(xp.shape) == 1:
xp = xp.reshape((1, len(xp)))
return super().kneighbors(xp, n_neighbors=n_neighbors)