Skip to content

Documentation for Stack

Creates a vertical stacking/parallelization of pipeline items.

The object acts as a single PipelineElement and encapsulates several vertically stacked other PipelineElements, each child receiving the same input data. The data is iteratively distributed to all children, the results are collected and horizontally concatenated.

Examples:

1
2
3
4
tree = PipelineElement('DecisionTreeClassifier')
svc = PipelineElement('LinearSVC')

my_pipe += Stack('final_stack', [tree, svc], use_probabilities=True)
Source code in photonai/base/photon_elements.py
class Stack(PipelineElement):
    """
    Creates a vertical stacking/parallelization of pipeline items.

    The object acts as a single PipelineElement and encapsulates
    several vertically stacked other PipelineElements, each
    child receiving the same input data. The data is iteratively
    distributed to all children, the results are collected
    and horizontally concatenated.

    Example:
        ``` python
        tree = PipelineElement('DecisionTreeClassifier')
        svc = PipelineElement('LinearSVC')

        my_pipe += Stack('final_stack', [tree, svc], use_probabilities=True)
        ```

    """
    def __init__(self, name: str, elements: List[PipelineElement] = None, use_probabilities: bool = False):
        """
        Creates a new Stack element.
        Collects all possible hyperparameter combinations of the children.

        Parameters:
            name:
                Give the pipeline element a name.

            elements:
                List of pipeline elements that should run in parallel.

            use_probabilities:
                For a stack that includes estimators you can choose whether
                predict or predict_proba is called for all estimators.
                In case only some implement predict_proba, predict
                is called for the remaining estimators.

        """
        super(Stack, self).__init__(name, hyperparameters={}, test_disabled=False, disabled=False,
                                    base_element=True)

        self._hyperparameters = {}
        self.elements = list()
        if elements is not None:
            for item_to_stack in elements:
                self.__iadd__(item_to_stack)

        # todo: Stack should not be allowed to change y, only covariates
        self.needs_y = False
        self.needs_covariates = True
        self.identifier = "STACK:"
        self.use_probabilities = use_probabilities

    def __iadd__(self, item: PipelineElement):
        """
        Add a new element to the stack.
        Generate sklearn hyperparameter names in order
        to set the item's hyperparameters in the optimization process.

        Parameters:
            item:
                The Element that should be stacked and will run in a
                vertical parallelization in the original pipe.

        """
        self.check_if_needs_y(item)
        super(Stack, self).__iadd__(item)

        # for each configuration
        tmp_dict = dict(item.hyperparameters)
        for key, element in tmp_dict.items():
            self._hyperparameters[self.name + '__' + key] = tmp_dict[key]

        return self

    def check_if_needs_y(self, item):
        if isinstance(item, (Branch, Stack, Switch)):
            for child_item in item.elements:
                self.check_if_needs_y(child_item)
        elif isinstance(item, PipelineElement):
            if item.needs_y:
                raise NotImplementedError("Elements in Stack must not transform y because the number of samples in every "
                                 "element of the stack might differ. Then, it will not be possible to concatenate those "
                                 "data and target matrices. Please use the transformer that is using y before or after "
                                 "the stack.")

    def add(self, item: PipelineElement):
        """
        Add a new element to the stack.
        Generate sklearn hyperparameter names in order
        to set the item's hyperparameters in the optimization process.

        Parameters:
            item:
                The Element that should be stacked and will run in a
                vertical parallelization in the original pipe.

        """
        self.__iadd__(item)

    @property
    def hyperparameters(self):
        return self._hyperparameters

    @hyperparameters.setter
    def hyperparameters(self, value):
        """
        Setting hyperparameters does not make sense, only the items that added
        can be optimized, not the container (self).
        """
        pass

    def generate_config_grid(self):
        tmp_grid = create_global_config_grid(self.elements, self.name)
        return tmp_grid

    def get_params(self, deep=True):
        all_params = {}
        for element in self.elements:
            all_params[element.name] = element.get_params(deep)
        return all_params

    def set_params(self, **kwargs):
        """Find the particular child and distribute the params to it"""
        spread_params_dict = {}
        for k, val in kwargs.items():
            splitted_k = k.split('__')
            item_name = splitted_k[0]
            if item_name not in spread_params_dict:
                spread_params_dict[item_name] = {}
            dict_entry = {'__'.join(splitted_k[1::]): val}
            spread_params_dict[item_name].update(dict_entry)

        for name, params in spread_params_dict.items():
            missing_element = (name, params)
            for element in self.elements:
                if element.name == name:
                    element.set_params(**params)
                    missing_element = None
            if missing_element:
                raise ValueError("Couldn't set hyperparameter for element {} -> {}".format(missing_element[0],
                                                                                           missing_element[1]))
        return self

    def fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs):
        """
        Calls fit iteratively on every child.

        Parameters:
            X:
                The array-like data with shape=[N, D], where N is the
                number of samples and D is the number of features.

            y:
                The truth array-like values with shape=[N],
                where N is the number of samples.

            **kwargs:
                Keyword arguments, passed to base_elements fit.

        Returns:
            Fitted self.

        """
        for element in self.elements:
            # Todo: parallellize fitting
            element.fit(X, y, **kwargs)
        return self

    def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
        """
        Calls the predict function on underlying base elements.

        Parameters:
            X:
                The array-like data with shape=[N, D], where N is the
                number of samples and D is the number of features.

            **kwargs:
                Keyword arguments, passed to base_elements predict.

        Returns:
            Prediction values.

        """
        if not self.use_probabilities:
            return self._predict(X, **kwargs)
        else:
            return self.predict_proba(X, **kwargs)

    def _predict(self, X: np.ndarray, **kwargs):
        """Iteratively calls predict on every child."""
        # Todo: strategy for concatenating data from different pipes
        # todo: parallelize prediction
        predicted_data = np.array([])
        for element in self.elements:
            element_transform = element.predict(X, **kwargs)
            predicted_data = PhotonDataHelper.stack_data_horizontally(predicted_data, element_transform)
        return predicted_data

    def predict_proba(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> np.ndarray:
        """
        Predict probabilities for every pipe element and stack them together.

        Parameters:
            X:
                The array-like data with shape=[N, D], where N is the number
                of samples and D is the number of features.

            y:
                The truth array-like values with shape=[N],
                where N is the number of samples.

            **kwargs:
                Keyword arguments, not used yet.

        Returns:
            Probability values.

        """
        predicted_data = np.array([])
        for element in self.elements:
            element_transform = element.predict_proba(X)
            if element_transform is None:
                element_transform = element.predict(X)
            predicted_data = PhotonDataHelper.stack_data_horizontally(predicted_data, element_transform)
        return predicted_data

    def transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray, dict):
        """
        Calls transform on every child.

        If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.

        Parameters:
            X:
                The array-liketraining with shape=[N, D] and test data,
                where N is the number of samples and D is the number of features.

            y:
                The truth array-like values with shape=[N],
                where N is the number of samples.

            **kwargs:
                Keyword arguments, passed to base_elements transform.

        Returns:
            Prediction values.

        """
        transformed_data = np.array([])
        for element in self.elements:
            # if it is a hyperpipe with a final estimator, we want to use predict:
            element_transform, _, _ = element.transform(X, y, **kwargs)
            transformed_data = PhotonDataHelper.stack_data_horizontally(transformed_data, element_transform)

        return transformed_data, y, kwargs

    def copy_me(self):
        ps = Stack(self.name)
        for element in self.elements:
            new_element = element.copy_me()
            ps += new_element
        ps.base_element = self.base_element
        ps._random_state = self._random_state
        return ps

    def inverse_transform(self, X, y=None, **kwargs):
        raise NotImplementedError("Inverse Transform is not yet implemented for a Stacking Element in PHOTON")

    @property
    def _estimator_type(self):
        return None

    def _check_hyper(self,BaseEstimator):
        pass

    @property
    def feature_importances_(self):
        return

__iadd__(self, item) special

Add a new element to the stack. Generate sklearn hyperparameter names in order to set the item's hyperparameters in the optimization process.

Parameters:

Name Type Description Default
item PipelineElement

The Element that should be stacked and will run in a vertical parallelization in the original pipe.

required
Source code in photonai/base/photon_elements.py
def __iadd__(self, item: PipelineElement):
    """
    Add a new element to the stack.
    Generate sklearn hyperparameter names in order
    to set the item's hyperparameters in the optimization process.

    Parameters:
        item:
            The Element that should be stacked and will run in a
            vertical parallelization in the original pipe.

    """
    self.check_if_needs_y(item)
    super(Stack, self).__iadd__(item)

    # for each configuration
    tmp_dict = dict(item.hyperparameters)
    for key, element in tmp_dict.items():
        self._hyperparameters[self.name + '__' + key] = tmp_dict[key]

    return self

__init__(self, name, elements=None, use_probabilities=False) special

Creates a new Stack element. Collects all possible hyperparameter combinations of the children.

Parameters:

Name Type Description Default
name str

Give the pipeline element a name.

required
elements List[photonai.base.photon_elements.PipelineElement]

List of pipeline elements that should run in parallel.

None
use_probabilities bool

For a stack that includes estimators you can choose whether predict or predict_proba is called for all estimators. In case only some implement predict_proba, predict is called for the remaining estimators.

False
Source code in photonai/base/photon_elements.py
def __init__(self, name: str, elements: List[PipelineElement] = None, use_probabilities: bool = False):
    """
    Creates a new Stack element.
    Collects all possible hyperparameter combinations of the children.

    Parameters:
        name:
            Give the pipeline element a name.

        elements:
            List of pipeline elements that should run in parallel.

        use_probabilities:
            For a stack that includes estimators you can choose whether
            predict or predict_proba is called for all estimators.
            In case only some implement predict_proba, predict
            is called for the remaining estimators.

    """
    super(Stack, self).__init__(name, hyperparameters={}, test_disabled=False, disabled=False,
                                base_element=True)

    self._hyperparameters = {}
    self.elements = list()
    if elements is not None:
        for item_to_stack in elements:
            self.__iadd__(item_to_stack)

    # todo: Stack should not be allowed to change y, only covariates
    self.needs_y = False
    self.needs_covariates = True
    self.identifier = "STACK:"
    self.use_probabilities = use_probabilities

add(self, item)

Add a new element to the stack. Generate sklearn hyperparameter names in order to set the item's hyperparameters in the optimization process.

Parameters:

Name Type Description Default
item PipelineElement

The Element that should be stacked and will run in a vertical parallelization in the original pipe.

required
Source code in photonai/base/photon_elements.py
def add(self, item: PipelineElement):
    """
    Add a new element to the stack.
    Generate sklearn hyperparameter names in order
    to set the item's hyperparameters in the optimization process.

    Parameters:
        item:
            The Element that should be stacked and will run in a
            vertical parallelization in the original pipe.

    """
    self.__iadd__(item)

fit(self, X, y=None, **kwargs)

Calls fit iteratively on every child.

Parameters:

Name Type Description Default
X ndarray

The array-like data with shape=[N, D], where N is the number of samples and D is the number of features.

required
y ndarray

The truth array-like values with shape=[N], where N is the number of samples.

None
**kwargs

Keyword arguments, passed to base_elements fit.

{}

Returns:

Type Description

Fitted self.

Source code in photonai/base/photon_elements.py
def fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs):
    """
    Calls fit iteratively on every child.

    Parameters:
        X:
            The array-like data with shape=[N, D], where N is the
            number of samples and D is the number of features.

        y:
            The truth array-like values with shape=[N],
            where N is the number of samples.

        **kwargs:
            Keyword arguments, passed to base_elements fit.

    Returns:
        Fitted self.

    """
    for element in self.elements:
        # Todo: parallellize fitting
        element.fit(X, y, **kwargs)
    return self

predict(self, X, **kwargs)

Calls the predict function on underlying base elements.

Parameters:

Name Type Description Default
X ndarray

The array-like data with shape=[N, D], where N is the number of samples and D is the number of features.

required
**kwargs

Keyword arguments, passed to base_elements predict.

{}

Returns:

Type Description
ndarray

Prediction values.

Source code in photonai/base/photon_elements.py
def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
    """
    Calls the predict function on underlying base elements.

    Parameters:
        X:
            The array-like data with shape=[N, D], where N is the
            number of samples and D is the number of features.

        **kwargs:
            Keyword arguments, passed to base_elements predict.

    Returns:
        Prediction values.

    """
    if not self.use_probabilities:
        return self._predict(X, **kwargs)
    else:
        return self.predict_proba(X, **kwargs)

predict_proba(self, X, y=None, **kwargs)

Predict probabilities for every pipe element and stack them together.

Parameters:

Name Type Description Default
X ndarray

The array-like data with shape=[N, D], where N is the number of samples and D is the number of features.

required
y ndarray

The truth array-like values with shape=[N], where N is the number of samples.

None
**kwargs

Keyword arguments, not used yet.

{}

Returns:

Type Description
ndarray

Probability values.

Source code in photonai/base/photon_elements.py
def predict_proba(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> np.ndarray:
    """
    Predict probabilities for every pipe element and stack them together.

    Parameters:
        X:
            The array-like data with shape=[N, D], where N is the number
            of samples and D is the number of features.

        y:
            The truth array-like values with shape=[N],
            where N is the number of samples.

        **kwargs:
            Keyword arguments, not used yet.

    Returns:
        Probability values.

    """
    predicted_data = np.array([])
    for element in self.elements:
        element_transform = element.predict_proba(X)
        if element_transform is None:
            element_transform = element.predict(X)
        predicted_data = PhotonDataHelper.stack_data_horizontally(predicted_data, element_transform)
    return predicted_data

transform(self, X, y=None, **kwargs)

Calls transform on every child.

If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.

Parameters:

Name Type Description Default
X ndarray

The array-liketraining with shape=[N, D] and test data, where N is the number of samples and D is the number of features.

required
y ndarray

The truth array-like values with shape=[N], where N is the number of samples.

None
**kwargs

Keyword arguments, passed to base_elements transform.

{}

Returns:

Type Description
(<class 'numpy.ndarray'>, <class 'numpy.ndarray'>, <class 'dict'>)

Prediction values.

Source code in photonai/base/photon_elements.py
def transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray, dict):
    """
    Calls transform on every child.

    If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.

    Parameters:
        X:
            The array-liketraining with shape=[N, D] and test data,
            where N is the number of samples and D is the number of features.

        y:
            The truth array-like values with shape=[N],
            where N is the number of samples.

        **kwargs:
            Keyword arguments, passed to base_elements transform.

    Returns:
        Prediction values.

    """
    transformed_data = np.array([])
    for element in self.elements:
        # if it is a hyperpipe with a final estimator, we want to use predict:
        element_transform, _, _ = element.transform(X, y, **kwargs)
        transformed_data = PhotonDataHelper.stack_data_horizontally(transformed_data, element_transform)

    return transformed_data, y, kwargs