Documentation for Switch
This class encapsulates several PipelineElements that belong at the same step of the pipeline, competing for being the best choice.
If for example you want to find out if Preprocessing A or Preprocessing B is better at this position in the pipe. Or you want to test if a random forest outperforms the good old SVM.
ATTENTION: This class is a construct that may be convenient but is not suitable for any complex optimizations. Currently optimization works for grid_search, random search and smac and the specializes switch optimizer.
Examples:
1 2 3 4 5 6 7 8 9 10 11 12 |
|
Source code in photonai/base/photon_elements.py
class Switch(PipelineElement):
"""
This class encapsulates several PipelineElements that
belong at the same step of the pipeline, competing for
being the best choice.
If for example you want to find out if Preprocessing A
or Preprocessing B is better at this position in the pipe.
Or you want to test if a random forest outperforms the good old SVM.
ATTENTION: This class is a construct that may be convenient
but is not suitable for any complex optimizations.
Currently optimization works for grid_search, random search and
smac and the specializes switch optimizer.
Example:
``` python
from photonai.base import PipelineElement, Switch
from photonai.optimization import IntegerRange
# Estimator Switch
svm = PipelineElement('SVC',
hyperparameters={'kernel': ['rbf', 'linear']})
tree = PipelineElement('DecisionTreeClassifier',
hyperparameters={'min_samples_split': IntegerRange(2, 5),
'min_samples_leaf': IntegerRange(1, 5),
'criterion': ['gini', 'entropy']})
my_pipe += Switch('EstimatorSwitch', [svm, tree])
```
"""
def __init__(self, name: str, elements: List[PipelineElement] = None, estimator_name: str = ''):
"""
Creates a new Switch object and generated the hyperparameter combination grid.
Parameters:
name:
How the element is called in the pipeline.
elements:
The competing pipeline elements.
estimator_name:
-
"""
self._name = name
self.initial_name = self._name
self.sklearn_name = self.name + "__current_element"
self._hyperparameters = {}
self._current_element = (1, 1)
self.pipeline_element_configurations = []
self.base_element = None
self.disabled = False
self.test_disabled = False
self.batch_size = 0
self.estimator_name = estimator_name
self.needs_y = True
self.needs_covariates = True
# we assume we test models against each other, but only guessing
self.is_estimator = True
self.is_transformer = True
self.identifier = "SWITCH:"
self._random_state = False
self.elements_dict = {}
if elements:
self.elements = elements
self.generate_private_config_grid()
for p_element in elements:
self.elements_dict[p_element.name] = p_element
else:
self.elements = []
def __iadd__(self, pipeline_element: PipelineElement):
"""
Add a new estimator or transformer object to the switch container.
All items change their positions during testing.
Parameters:
pipeline_element:
Item that should be tested against other
competing elements at that position in the pipeline.
"""
super(Switch, self).__iadd__(pipeline_element)
self.elements_dict[pipeline_element.name] = pipeline_element
self.generate_private_config_grid()
return self
def add(self, pipeline_element: PipelineElement):
"""
Add a new estimator or transformer object to the switch container.
All items change their positions during testing.
Parameters:
pipeline_element:
Item that should be tested against other
competing elements at that position in the pipeline.
"""
self.__iadd__(pipeline_element)
@property
def hyperparameters(self):
# Todo: return actual hyperparameters of all pipeline elements??
return self._hyperparameters
@hyperparameters.setter
def hyperparameters(self, value):
pass
def generate_private_config_grid(self):
# reset
self.pipeline_element_configurations = []
# calculate anew
hyperparameters = []
# generate possible combinations for each item respectively - do not mix hyperparameters across items
for i, pipe_element in enumerate(self.elements):
# distinct_values_config = create_global_config([pipe_element])
# add pipeline switch name in the config so that the hyperparameters can be set from other classes
# pipeline switch will give the hyperparameters to the respective child
# distinct_values_config_copy = {}
# for config_key, config_value in distinct_values_config.items():
# distinct_values_config_copy[self.name + "__" + config_key] = config_value
if hasattr(pipe_element, 'generate_config_grid'):
element_configurations = pipe_element.generate_config_grid()
final_configuration_list = []
if len(element_configurations) == 0:
final_configuration_list.append({})
# else:
for dict_item in element_configurations:
# copy_of_dict_item = {}
# for key, value in dict_item.items():
# copy_of_dict_item[self.name + '__' + key] = value
final_configuration_list.append(dict(dict_item))
self.pipeline_element_configurations.append(final_configuration_list)
hyperparameters += [(i, nr) for nr in range(len(final_configuration_list))]
self._hyperparameters = {self.sklearn_name: hyperparameters}
@property
def current_element(self):
return self._current_element
@current_element.setter
def current_element(self, value):
self._current_element = value
self.base_element = self.elements[self.current_element[0]]
def get_params(self, deep: bool=True):
if self.base_element:
return self.base_element.get_params(deep)
else:
return {}
def set_params(self, **kwargs):
"""
The optimization process sees the amount of possible
combinations and chooses one of them. Then this class activates
the belonging element and prepared the element with the
particular chosen configuration.
"""
config_nr = None
config = None
self.estimator_name = ''
# copy dict for adaptations
params = dict(kwargs)
# in case we are operating with grid search
if self.sklearn_name in params:
config_nr = params[self.sklearn_name]
elif 'current_element' in params:
config_nr = params['current_element']
if "estimator_name" in kwargs:
self.estimator_name = params["estimator_name"]
del params["estimator_name"]
self.base_element = self.elements_dict[self.estimator_name]
if params is not None:
config = params
# todo: raise Warning that Switch could not identify which estimator to set when estimator
# has no params to optimize
# in case we are operating with grid search or any derivates
if config_nr is not None:
if not isinstance(config_nr, (tuple, list)):
logger.error('ValueError: current_element must be of type Tuple')
raise ValueError('current_element must be of type Tuple')
# grid search hack
self.current_element = config_nr
config = self.pipeline_element_configurations[config_nr[0]][config_nr[1]]
# if we don't use the specialized switch optimizer
# we need to identify the element to activate by checking for which element the optimizer gave params
elif not self.estimator_name:
# ugly hack because subscription is somehow not possible, we use the for loop but break
for kwargs_key, kwargs_value in params.items():
first_element_name = kwargs_key.split("__")[0]
self.base_element = self.elements_dict[first_element_name]
break
# so now the element to be activated is found and taken care of,
# let's move on to give the base element the config to set
if config:
# remove name
unnamed_config = {}
for config_key, config_value in config.items():
key_split = config_key.split('__')
unnamed_config['__'.join(key_split[1::])] = config_value
self.base_element.set_params(**unnamed_config)
return self
def copy_me(self):
ps = Switch(self.name)
ps._random_state = self._random_state
for element in self.elements:
new_element = element.copy_me()
ps += new_element
ps._current_element = self._current_element
return ps
def prettify_config_output(self, config_name, config_value, return_dict=False) -> str:
"""
Makes the sklearn configuration dictionary human readable.
Returns:
Configuration as prettified string or configuration as
dict with prettified keys.
"""
if isinstance(config_value, tuple):
output = self.pipeline_element_configurations[config_value[0]][config_value[1]]
if not output:
if return_dict:
return {self.elements[config_value[0]].name: None}
else:
return self.elements[config_value[0]].name
else:
if return_dict:
return output
return str(output)
else:
return super(Switch, self).prettify_config_output(config_name, config_value)
def predict_proba(self, X: np.ndarray, **kwargs) -> Union[np.ndarray, None]:
"""
Predict probabilities. Base element needs predict_proba()
function, otherwise return None.
Parameters:
X:
The array-like data with shape=[N, D], where N is the number
of samples and D is the number of features.
**kwargs:
Keyword arguments, not in use yet.
Returns:
Probabilities.
"""
if not self.disabled:
if hasattr(self.base_element.base_element, 'predict_proba'):
return self.base_element.predict_proba(X)
else:
return None
return X
def _check_hyper(self,BaseEstimator):
pass
def inverse_transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs):
"""
Calls inverse_transform on the base element.
For a dimension preserving transformer without inverse,
the value is returned untreated.
Parameters:
X:
The array-like data with shape=[N, D], where N
is the number of samples and D is the number of features.
y:
The truth array-like values with shape=[N], where N is
the number of samples.
**kwargs:
Keyword arguments, passed to base_element.transform.
Returns:
(X, y, kwargs) in back-transformed version if possible.
"""
if hasattr(self.base_element, 'inverse_transform'):
# todo: check this
X, y, kwargs = self.adjusted_delegate_call(self.base_element.inverse_transform, X, y, **kwargs)
return X, y, kwargs
@property
def _estimator_type(self):
estimator_types = list()
for element in self.elements:
estimator_types.append(getattr(element, '_estimator_type'))
unique_types = set(estimator_types)
if len(unique_types) > 1:
raise NotImplementedError("Switch should only contain elements of a single type (transformer, classifier, "
"regressor). Found multiple types: {}".format(unique_types))
elif len(unique_types) == 1:
return list(unique_types)[0]
else:
return
@property
def feature_importances_(self):
if hasattr(self.base_element, 'feature_importances_'):
return getattr(self.base_element, 'feature_importances_')
__iadd__(self, pipeline_element)
special
Add a new estimator or transformer object to the switch container. All items change their positions during testing.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pipeline_element |
PipelineElement |
Item that should be tested against other competing elements at that position in the pipeline. |
required |
Source code in photonai/base/photon_elements.py
def __iadd__(self, pipeline_element: PipelineElement):
"""
Add a new estimator or transformer object to the switch container.
All items change their positions during testing.
Parameters:
pipeline_element:
Item that should be tested against other
competing elements at that position in the pipeline.
"""
super(Switch, self).__iadd__(pipeline_element)
self.elements_dict[pipeline_element.name] = pipeline_element
self.generate_private_config_grid()
return self
__init__(self, name, elements=None, estimator_name='')
special
Creates a new Switch object and generated the hyperparameter combination grid.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name |
str |
How the element is called in the pipeline. |
required |
elements |
List[photonai.base.photon_elements.PipelineElement] |
The competing pipeline elements. |
None |
estimator_name |
str |
- |
'' |
Source code in photonai/base/photon_elements.py
def __init__(self, name: str, elements: List[PipelineElement] = None, estimator_name: str = ''):
"""
Creates a new Switch object and generated the hyperparameter combination grid.
Parameters:
name:
How the element is called in the pipeline.
elements:
The competing pipeline elements.
estimator_name:
-
"""
self._name = name
self.initial_name = self._name
self.sklearn_name = self.name + "__current_element"
self._hyperparameters = {}
self._current_element = (1, 1)
self.pipeline_element_configurations = []
self.base_element = None
self.disabled = False
self.test_disabled = False
self.batch_size = 0
self.estimator_name = estimator_name
self.needs_y = True
self.needs_covariates = True
# we assume we test models against each other, but only guessing
self.is_estimator = True
self.is_transformer = True
self.identifier = "SWITCH:"
self._random_state = False
self.elements_dict = {}
if elements:
self.elements = elements
self.generate_private_config_grid()
for p_element in elements:
self.elements_dict[p_element.name] = p_element
else:
self.elements = []
add(self, pipeline_element)
Add a new estimator or transformer object to the switch container. All items change their positions during testing.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pipeline_element |
PipelineElement |
Item that should be tested against other competing elements at that position in the pipeline. |
required |
Source code in photonai/base/photon_elements.py
def add(self, pipeline_element: PipelineElement):
"""
Add a new estimator or transformer object to the switch container.
All items change their positions during testing.
Parameters:
pipeline_element:
Item that should be tested against other
competing elements at that position in the pipeline.
"""
self.__iadd__(pipeline_element)
predict_proba(self, X, **kwargs)
Predict probabilities. Base element needs predict_proba() function, otherwise return None.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The array-like data with shape=[N, D], where N is the number of samples and D is the number of features. |
required |
**kwargs |
Keyword arguments, not in use yet. |
{} |
Returns:
Type | Description |
---|---|
Optional[numpy.ndarray] |
Probabilities. |
Source code in photonai/base/photon_elements.py
def predict_proba(self, X: np.ndarray, **kwargs) -> Union[np.ndarray, None]:
"""
Predict probabilities. Base element needs predict_proba()
function, otherwise return None.
Parameters:
X:
The array-like data with shape=[N, D], where N is the number
of samples and D is the number of features.
**kwargs:
Keyword arguments, not in use yet.
Returns:
Probabilities.
"""
if not self.disabled:
if hasattr(self.base_element.base_element, 'predict_proba'):
return self.base_element.predict_proba(X)
else:
return None
return X