Documentation for ModelSelector
Model Selector - based on feature_importance.
Apply feature selection on specific estimator and its importance scores.
Source code in photonai/modelwrapper/feature_selection.py
class ModelSelector(BaseEstimator, TransformerMixin):
"""Model Selector - based on feature_importance.
Apply feature selection on specific estimator
and its importance scores.
"""
_estimator_type = "transformer"
def __init__(self, estimator_obj: BaseEstimator, threshold: float = 1e-5, percentile: bool = False):
"""
Initialize the object.
Parameters:
estimator_obj:
Estimator with fit/tranform and possibility of feature_importance.
threshold:
If percentile == True:
Lower Bound for required importance score to keep.
If percentile == True:
percentage to keep (ordered features by feature_importance)
percentile:
Percent of features to keep.
"""
self.threshold = threshold
self.estimator_obj = estimator_obj
self.selected_indices = []
self.percentile = percentile
self.importance_scores = []
self.n_original_features = None
def _get_feature_importances(self, estimator, norm_order=1):
"""Retrieve or aggregate feature importances from estimator"""
importances = getattr(estimator, "feature_importances_", None)
if importances is None and hasattr(estimator, "coef_"):
if estimator.coef_.ndim == 1:
importances = np.abs(estimator.coef_)
else:
importances = np.linalg.norm(estimator.coef_, axis=0,
ord=norm_order)
elif importances is None:
raise ValueError(
"The underlying estimator %s has no `coef_` or "
"`feature_importances_` attribute. Either pass a fitted estimator"
" to SelectFromModel or call fit before calling transform."
% estimator.__class__.__name__)
return importances
def fit(self, X, y=None, **kwargs):
self.n_original_features = X.shape[1]
# 1. fit estimator
self.estimator_obj.fit(X, y)
# penalty = "l1"
self.importance_scores = self._get_feature_importances(self.estimator_obj)
if not self.percentile:
self.selected_indices = np.where(self.importance_scores >= self.threshold)[0]
else:
# Todo: works only for binary classification, not for multiclass
if self.threshold > 1:
raise ValueError("Threshold should not be greater than 1")
ordered_importances = np.sort(self.importance_scores)
if isinstance(X, list):
X = np.array(X)
index = int(np.floor((1-self.threshold) * X.shape[1]))
percentile_thres = ordered_importances[index]
self.selected_indices = np.where(self.importance_scores >= percentile_thres)[0]
# Todo: sortieren und Grenze definieren und dann np.where
pass
return self
def transform(self, X, y=None, **kwargs):
if isinstance(X, list):
X = np.array(X)
X_new = X[:, self.selected_indices]
# if no features were selected raise error
if X_new.shape[1] == 0:
print("No Features were selected from model, using all features")
return X
return X_new
def inverse_transform(self, X):
if X.shape[1] != len(self.selected_indices):
msg = "X has a different shape than during fitting."
logger.error(msg)
raise ValueError(msg)
Xt = np.zeros((X.shape[0], self.n_original_features))
Xt[:, self.selected_indices] = X
return Xt
def set_params(self, **params):
if 'threshold' in params:
self.threshold = params['threshold']
params.pop('threshold')
self.estimator_obj.set_params(**params)
def get_params(self, deep=True):
return self.estimator_obj.get_params(deep)
__init__(self, estimator_obj, threshold=1e-05, percentile=False)
special
Initialize the object.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
estimator_obj |
BaseEstimator |
Estimator with fit/tranform and possibility of feature_importance. |
required |
threshold |
float |
If percentile == True: Lower Bound for required importance score to keep. If percentile == True: percentage to keep (ordered features by feature_importance) |
1e-05 |
percentile |
bool |
Percent of features to keep. |
False |
Source code in photonai/modelwrapper/feature_selection.py
def __init__(self, estimator_obj: BaseEstimator, threshold: float = 1e-5, percentile: bool = False):
"""
Initialize the object.
Parameters:
estimator_obj:
Estimator with fit/tranform and possibility of feature_importance.
threshold:
If percentile == True:
Lower Bound for required importance score to keep.
If percentile == True:
percentage to keep (ordered features by feature_importance)
percentile:
Percent of features to keep.
"""
self.threshold = threshold
self.estimator_obj = estimator_obj
self.selected_indices = []
self.percentile = percentile
self.importance_scores = []
self.n_original_features = None