Skip to content

Documentation for ModelSelector

Model Selector - based on feature_importance.

Apply feature selection on specific estimator and its importance scores.

Source code in photonai/modelwrapper/feature_selection.py
class ModelSelector(BaseEstimator, TransformerMixin):
    """Model Selector - based on feature_importance.

    Apply feature selection on specific estimator
    and its importance scores.

     """
    _estimator_type = "transformer"

    def __init__(self, estimator_obj: BaseEstimator, threshold: float = 1e-5, percentile: bool = False):
        """
        Initialize the object.

        Parameters:
            estimator_obj:
                Estimator with fit/tranform and possibility of feature_importance.

            threshold:
                If percentile == True:
                    Lower Bound for required importance score to keep.
                If percentile == True:
                    percentage to keep (ordered features by feature_importance)

            percentile:
                Percent of features to keep.

        """
        self.threshold = threshold
        self.estimator_obj = estimator_obj
        self.selected_indices = []
        self.percentile = percentile
        self.importance_scores = []
        self.n_original_features = None

    def _get_feature_importances(self, estimator, norm_order=1):
        """Retrieve or aggregate feature importances from estimator"""
        importances = getattr(estimator, "feature_importances_", None)

        if importances is None and hasattr(estimator, "coef_"):
            if estimator.coef_.ndim == 1:
                importances = np.abs(estimator.coef_)

            else:
                importances = np.linalg.norm(estimator.coef_, axis=0,
                                             ord=norm_order)

        elif importances is None:
            raise ValueError(
                "The underlying estimator %s has no `coef_` or "
                "`feature_importances_` attribute. Either pass a fitted estimator"
                " to SelectFromModel or call fit before calling transform."
                % estimator.__class__.__name__)

        return importances

    def fit(self, X, y=None, **kwargs):
        self.n_original_features = X.shape[1]
        # 1. fit estimator
        self.estimator_obj.fit(X, y)
        # penalty = "l1"
        self.importance_scores = self._get_feature_importances(self.estimator_obj)

        if not self.percentile:
            self.selected_indices = np.where(self.importance_scores >= self.threshold)[0]
        else:
            # Todo: works only for binary classification, not for multiclass
            if self.threshold > 1:
                raise ValueError("Threshold should not be greater than 1")
            ordered_importances = np.sort(self.importance_scores)
            if isinstance(X, list):
                X = np.array(X)
            index = int(np.floor((1-self.threshold) * X.shape[1]))
            percentile_thres = ordered_importances[index]
            self.selected_indices = np.where(self.importance_scores >= percentile_thres)[0]
            # Todo: sortieren und Grenze definieren und dann np.where
            pass
        return self

    def transform(self, X, y=None, **kwargs):

        if isinstance(X, list):
            X = np.array(X)

        X_new = X[:, self.selected_indices]

        # if no features were selected raise error
        if X_new.shape[1] == 0:
            print("No Features were selected from model, using all features")
            return X
        return X_new

    def inverse_transform(self, X):
        if X.shape[1] != len(self.selected_indices):
            msg = "X has a different shape than during fitting."
            logger.error(msg)
            raise ValueError(msg)
        Xt = np.zeros((X.shape[0], self.n_original_features))
        Xt[:, self.selected_indices] = X
        return Xt

    def set_params(self, **params):
        if 'threshold' in params:
            self.threshold = params['threshold']
            params.pop('threshold')
        self.estimator_obj.set_params(**params)

    def get_params(self, deep=True):
        return self.estimator_obj.get_params(deep)

__init__(self, estimator_obj, threshold=1e-05, percentile=False) special

Initialize the object.

Parameters:

Name Type Description Default
estimator_obj BaseEstimator

Estimator with fit/tranform and possibility of feature_importance.

required
threshold float

If percentile == True: Lower Bound for required importance score to keep. If percentile == True: percentage to keep (ordered features by feature_importance)

1e-05
percentile bool

Percent of features to keep.

False
Source code in photonai/modelwrapper/feature_selection.py
def __init__(self, estimator_obj: BaseEstimator, threshold: float = 1e-5, percentile: bool = False):
    """
    Initialize the object.

    Parameters:
        estimator_obj:
            Estimator with fit/tranform and possibility of feature_importance.

        threshold:
            If percentile == True:
                Lower Bound for required importance score to keep.
            If percentile == True:
                percentage to keep (ordered features by feature_importance)

        percentile:
            Percent of features to keep.

    """
    self.threshold = threshold
    self.estimator_obj = estimator_obj
    self.selected_indices = []
    self.percentile = percentile
    self.importance_scores = []
    self.n_original_features = None