[docs]classSearchEnginePredictions(SearchEngineVectors):""" Extends class :class:`SearchEngineVectors <mlinsights.search_rank.search_engine_vectors.SearchEngineVectors>` by looking for neighbors to a vector *X* by looking neighbors to *f(X)* and not *X*. *f* can be any function which converts a vector into another one or a machine learned model. In that case, *f* will be set to a default behavior. See function :func:`mlinsights.mlmodel.ml_featurizer.model_featurizer`. :param fct: function *f* applied before looking for neighbors, it can also be a machine learned model :param fct_params: parameters sent to function :func:`mlinsights.mlmodel.ml_featurizer.model_featurizer` :param knn: list of parameters, see :class:`sklearn.neighbors.NearestNeighbors` """def__init__(self,fct,fct_params=None,**knn):super().__init__(**knn)self._fct_params=fct_paramsself._fct_init=fctif(callable(fct)andnothasattr(fct,"predict")andnothasattr(fct,"forward")):self.fct=fctelse:iffct_paramsisNone:fct_params={}self.fct=model_featurizer(fct,**fct_params)def__repr__(self):""" usual """ifself.pknn:pp=self.pknn.copy()else:pp={}pp["fct"]=self._fct_initpp["fct_params"]=self._fct_paramsreturnformat_function_call(self.__class__.__name__,pp)
[docs]deffit(self,data=None,features=None,metadata=None):""" Every vector comes with a list of metadata. :param data: a :epkg:`dataframe` or None if the the features and the metadata are specified with an array and a dictionary :param features: features columns or an array :param metadata: data :return: self """iterate=self._is_iterable(data)ifiterate:self._prepare_fit(data=data,features=features,metadata=metadata,transform=self.fct)else:self._prepare_fit(data=data,features=features,metadata=metadata)assertnotisinstance(self.features_,list),"features_ cannot be a list when training the model."self.features_=self.fct(self.features_,True)returnself._fit_knn()
[docs]defkneighbors(self,X,n_neighbors=None):""" Searches for neighbors close to *X*. @param X features @return score, ind, meta *score* is an array representing the lengths to points, *ind* contains the indices of the nearest points in the population matrix, *meta* is the metadata. """xp=self.fct(X,False)iflen(xp.shape)==1:xp=xp.reshape((1,len(xp)))returnsuper().kneighbors(xp,n_neighbors=n_neighbors)