Documentation for Stack
Creates a vertical stacking/parallelization of pipeline items.
The object acts as a single PipelineElement and encapsulates several vertically stacked other PipelineElements, each child receiving the same input data. The data is iteratively distributed to all children, the results are collected and horizontally concatenated.
Examples:
1 2 3 4 |
|
Source code in photonai/base/photon_elements.py
class Stack(PipelineElement):
"""
Creates a vertical stacking/parallelization of pipeline items.
The object acts as a single PipelineElement and encapsulates
several vertically stacked other PipelineElements, each
child receiving the same input data. The data is iteratively
distributed to all children, the results are collected
and horizontally concatenated.
Example:
``` python
tree = PipelineElement('DecisionTreeClassifier')
svc = PipelineElement('LinearSVC')
my_pipe += Stack('final_stack', [tree, svc], use_probabilities=True)
```
"""
def __init__(self, name: str, elements: List[PipelineElement] = None, use_probabilities: bool = False):
"""
Creates a new Stack element.
Collects all possible hyperparameter combinations of the children.
Parameters:
name:
Give the pipeline element a name.
elements:
List of pipeline elements that should run in parallel.
use_probabilities:
For a stack that includes estimators you can choose whether
predict or predict_proba is called for all estimators.
In case only some implement predict_proba, predict
is called for the remaining estimators.
"""
super(Stack, self).__init__(name, hyperparameters={}, test_disabled=False, disabled=False,
base_element=True)
self._hyperparameters = {}
self.elements = list()
if elements is not None:
for item_to_stack in elements:
self.__iadd__(item_to_stack)
# todo: Stack should not be allowed to change y, only covariates
self.needs_y = False
self.needs_covariates = True
self.identifier = "STACK:"
self.use_probabilities = use_probabilities
def __iadd__(self, item: PipelineElement):
"""
Add a new element to the stack.
Generate sklearn hyperparameter names in order
to set the item's hyperparameters in the optimization process.
Parameters:
item:
The Element that should be stacked and will run in a
vertical parallelization in the original pipe.
"""
self.check_if_needs_y(item)
super(Stack, self).__iadd__(item)
# for each configuration
tmp_dict = dict(item.hyperparameters)
for key, element in tmp_dict.items():
self._hyperparameters[self.name + '__' + key] = tmp_dict[key]
return self
def check_if_needs_y(self, item):
if isinstance(item, (Branch, Stack, Switch)):
for child_item in item.elements:
self.check_if_needs_y(child_item)
elif isinstance(item, PipelineElement):
if item.needs_y:
raise NotImplementedError("Elements in Stack must not transform y because the number of samples in every "
"element of the stack might differ. Then, it will not be possible to concatenate those "
"data and target matrices. Please use the transformer that is using y before or after "
"the stack.")
def add(self, item: PipelineElement):
"""
Add a new element to the stack.
Generate sklearn hyperparameter names in order
to set the item's hyperparameters in the optimization process.
Parameters:
item:
The Element that should be stacked and will run in a
vertical parallelization in the original pipe.
"""
self.__iadd__(item)
@property
def hyperparameters(self):
return self._hyperparameters
@hyperparameters.setter
def hyperparameters(self, value):
"""
Setting hyperparameters does not make sense, only the items that added
can be optimized, not the container (self).
"""
pass
def generate_config_grid(self):
tmp_grid = create_global_config_grid(self.elements, self.name)
return tmp_grid
def get_params(self, deep=True):
all_params = {}
for element in self.elements:
all_params[element.name] = element.get_params(deep)
return all_params
def set_params(self, **kwargs):
"""Find the particular child and distribute the params to it"""
spread_params_dict = {}
for k, val in kwargs.items():
splitted_k = k.split('__')
item_name = splitted_k[0]
if item_name not in spread_params_dict:
spread_params_dict[item_name] = {}
dict_entry = {'__'.join(splitted_k[1::]): val}
spread_params_dict[item_name].update(dict_entry)
for name, params in spread_params_dict.items():
missing_element = (name, params)
for element in self.elements:
if element.name == name:
element.set_params(**params)
missing_element = None
if missing_element:
raise ValueError("Couldn't set hyperparameter for element {} -> {}".format(missing_element[0],
missing_element[1]))
return self
def fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs):
"""
Calls fit iteratively on every child.
Parameters:
X:
The array-like data with shape=[N, D], where N is the
number of samples and D is the number of features.
y:
The truth array-like values with shape=[N],
where N is the number of samples.
**kwargs:
Keyword arguments, passed to base_elements fit.
Returns:
Fitted self.
"""
for element in self.elements:
# Todo: parallellize fitting
element.fit(X, y, **kwargs)
return self
def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
"""
Calls the predict function on underlying base elements.
Parameters:
X:
The array-like data with shape=[N, D], where N is the
number of samples and D is the number of features.
**kwargs:
Keyword arguments, passed to base_elements predict.
Returns:
Prediction values.
"""
if not self.use_probabilities:
return self._predict(X, **kwargs)
else:
return self.predict_proba(X, **kwargs)
def _predict(self, X: np.ndarray, **kwargs):
"""Iteratively calls predict on every child."""
# Todo: strategy for concatenating data from different pipes
# todo: parallelize prediction
predicted_data = np.array([])
for element in self.elements:
element_transform = element.predict(X, **kwargs)
predicted_data = PhotonDataHelper.stack_data_horizontally(predicted_data, element_transform)
return predicted_data
def predict_proba(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> np.ndarray:
"""
Predict probabilities for every pipe element and stack them together.
Parameters:
X:
The array-like data with shape=[N, D], where N is the number
of samples and D is the number of features.
y:
The truth array-like values with shape=[N],
where N is the number of samples.
**kwargs:
Keyword arguments, not used yet.
Returns:
Probability values.
"""
predicted_data = np.array([])
for element in self.elements:
element_transform = element.predict_proba(X)
if element_transform is None:
element_transform = element.predict(X)
predicted_data = PhotonDataHelper.stack_data_horizontally(predicted_data, element_transform)
return predicted_data
def transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray, dict):
"""
Calls transform on every child.
If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.
Parameters:
X:
The array-liketraining with shape=[N, D] and test data,
where N is the number of samples and D is the number of features.
y:
The truth array-like values with shape=[N],
where N is the number of samples.
**kwargs:
Keyword arguments, passed to base_elements transform.
Returns:
Prediction values.
"""
transformed_data = np.array([])
for element in self.elements:
# if it is a hyperpipe with a final estimator, we want to use predict:
element_transform, _, _ = element.transform(X, y, **kwargs)
transformed_data = PhotonDataHelper.stack_data_horizontally(transformed_data, element_transform)
return transformed_data, y, kwargs
def copy_me(self):
ps = Stack(self.name)
for element in self.elements:
new_element = element.copy_me()
ps += new_element
ps.base_element = self.base_element
ps._random_state = self._random_state
return ps
def inverse_transform(self, X, y=None, **kwargs):
raise NotImplementedError("Inverse Transform is not yet implemented for a Stacking Element in PHOTON")
@property
def _estimator_type(self):
return None
def _check_hyper(self,BaseEstimator):
pass
@property
def feature_importances_(self):
return
__iadd__(self, item)
special
Add a new element to the stack. Generate sklearn hyperparameter names in order to set the item's hyperparameters in the optimization process.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
item |
PipelineElement |
The Element that should be stacked and will run in a vertical parallelization in the original pipe. |
required |
Source code in photonai/base/photon_elements.py
def __iadd__(self, item: PipelineElement):
"""
Add a new element to the stack.
Generate sklearn hyperparameter names in order
to set the item's hyperparameters in the optimization process.
Parameters:
item:
The Element that should be stacked and will run in a
vertical parallelization in the original pipe.
"""
self.check_if_needs_y(item)
super(Stack, self).__iadd__(item)
# for each configuration
tmp_dict = dict(item.hyperparameters)
for key, element in tmp_dict.items():
self._hyperparameters[self.name + '__' + key] = tmp_dict[key]
return self
__init__(self, name, elements=None, use_probabilities=False)
special
Creates a new Stack element. Collects all possible hyperparameter combinations of the children.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name |
str |
Give the pipeline element a name. |
required |
elements |
List[photonai.base.photon_elements.PipelineElement] |
List of pipeline elements that should run in parallel. |
None |
use_probabilities |
bool |
For a stack that includes estimators you can choose whether predict or predict_proba is called for all estimators. In case only some implement predict_proba, predict is called for the remaining estimators. |
False |
Source code in photonai/base/photon_elements.py
def __init__(self, name: str, elements: List[PipelineElement] = None, use_probabilities: bool = False):
"""
Creates a new Stack element.
Collects all possible hyperparameter combinations of the children.
Parameters:
name:
Give the pipeline element a name.
elements:
List of pipeline elements that should run in parallel.
use_probabilities:
For a stack that includes estimators you can choose whether
predict or predict_proba is called for all estimators.
In case only some implement predict_proba, predict
is called for the remaining estimators.
"""
super(Stack, self).__init__(name, hyperparameters={}, test_disabled=False, disabled=False,
base_element=True)
self._hyperparameters = {}
self.elements = list()
if elements is not None:
for item_to_stack in elements:
self.__iadd__(item_to_stack)
# todo: Stack should not be allowed to change y, only covariates
self.needs_y = False
self.needs_covariates = True
self.identifier = "STACK:"
self.use_probabilities = use_probabilities
add(self, item)
Add a new element to the stack. Generate sklearn hyperparameter names in order to set the item's hyperparameters in the optimization process.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
item |
PipelineElement |
The Element that should be stacked and will run in a vertical parallelization in the original pipe. |
required |
Source code in photonai/base/photon_elements.py
def add(self, item: PipelineElement):
"""
Add a new element to the stack.
Generate sklearn hyperparameter names in order
to set the item's hyperparameters in the optimization process.
Parameters:
item:
The Element that should be stacked and will run in a
vertical parallelization in the original pipe.
"""
self.__iadd__(item)
fit(self, X, y=None, **kwargs)
Calls fit iteratively on every child.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The array-like data with shape=[N, D], where N is the number of samples and D is the number of features. |
required |
y |
ndarray |
The truth array-like values with shape=[N], where N is the number of samples. |
None |
**kwargs |
Keyword arguments, passed to base_elements fit. |
{} |
Returns:
Type | Description |
---|---|
Fitted self. |
Source code in photonai/base/photon_elements.py
def fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs):
"""
Calls fit iteratively on every child.
Parameters:
X:
The array-like data with shape=[N, D], where N is the
number of samples and D is the number of features.
y:
The truth array-like values with shape=[N],
where N is the number of samples.
**kwargs:
Keyword arguments, passed to base_elements fit.
Returns:
Fitted self.
"""
for element in self.elements:
# Todo: parallellize fitting
element.fit(X, y, **kwargs)
return self
predict(self, X, **kwargs)
Calls the predict function on underlying base elements.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The array-like data with shape=[N, D], where N is the number of samples and D is the number of features. |
required |
**kwargs |
Keyword arguments, passed to base_elements predict. |
{} |
Returns:
Type | Description |
---|---|
ndarray |
Prediction values. |
Source code in photonai/base/photon_elements.py
def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
"""
Calls the predict function on underlying base elements.
Parameters:
X:
The array-like data with shape=[N, D], where N is the
number of samples and D is the number of features.
**kwargs:
Keyword arguments, passed to base_elements predict.
Returns:
Prediction values.
"""
if not self.use_probabilities:
return self._predict(X, **kwargs)
else:
return self.predict_proba(X, **kwargs)
predict_proba(self, X, y=None, **kwargs)
Predict probabilities for every pipe element and stack them together.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The array-like data with shape=[N, D], where N is the number of samples and D is the number of features. |
required |
y |
ndarray |
The truth array-like values with shape=[N], where N is the number of samples. |
None |
**kwargs |
Keyword arguments, not used yet. |
{} |
Returns:
Type | Description |
---|---|
ndarray |
Probability values. |
Source code in photonai/base/photon_elements.py
def predict_proba(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> np.ndarray:
"""
Predict probabilities for every pipe element and stack them together.
Parameters:
X:
The array-like data with shape=[N, D], where N is the number
of samples and D is the number of features.
y:
The truth array-like values with shape=[N],
where N is the number of samples.
**kwargs:
Keyword arguments, not used yet.
Returns:
Probability values.
"""
predicted_data = np.array([])
for element in self.elements:
element_transform = element.predict_proba(X)
if element_transform is None:
element_transform = element.predict(X)
predicted_data = PhotonDataHelper.stack_data_horizontally(predicted_data, element_transform)
return predicted_data
transform(self, X, y=None, **kwargs)
Calls transform on every child.
If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The array-liketraining with shape=[N, D] and test data, where N is the number of samples and D is the number of features. |
required |
y |
ndarray |
The truth array-like values with shape=[N], where N is the number of samples. |
None |
**kwargs |
Keyword arguments, passed to base_elements transform. |
{} |
Returns:
Type | Description |
---|---|
(<class 'numpy.ndarray'>, <class 'numpy.ndarray'>, <class 'dict'>) |
Prediction values. |
Source code in photonai/base/photon_elements.py
def transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray, dict):
"""
Calls transform on every child.
If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.
Parameters:
X:
The array-liketraining with shape=[N, D] and test data,
where N is the number of samples and D is the number of features.
y:
The truth array-like values with shape=[N],
where N is the number of samples.
**kwargs:
Keyword arguments, passed to base_elements transform.
Returns:
Prediction values.
"""
transformed_data = np.array([])
for element in self.elements:
# if it is a hyperpipe with a final estimator, we want to use predict:
element_transform, _, _ = element.transform(X, y, **kwargs)
transformed_data = PhotonDataHelper.stack_data_horizontally(transformed_data, element_transform)
return transformed_data, y, kwargs