Skip to content

Documentation for Branch

A substream of pipeline elements that is encapsulated, e.g. for parallelization.

Examples:

1
2
3
4
5
6
7
8
from photonai.base import Branch
from photonai.optimization import IntegerRange

tree_qua_branch = Branch('tree_branch')
tree_qua_branch += PipelineElement('QuantileTransformer', n_quantiles=100)
tree_qua_branch += PipelineElement('DecisionTreeClassifier',
                                   {'min_samples_split': IntegerRange(2, 4)},
                                   criterion='gini')
Source code in photonai/base/photon_elements.py
class Branch(PipelineElement):
    """
     A substream of pipeline elements that is encapsulated, e.g. for parallelization.

     Example:
         ``` python
         from photonai.base import Branch
         from photonai.optimization import IntegerRange

         tree_qua_branch = Branch('tree_branch')
         tree_qua_branch += PipelineElement('QuantileTransformer', n_quantiles=100)
         tree_qua_branch += PipelineElement('DecisionTreeClassifier',
                                            {'min_samples_split': IntegerRange(2, 4)},
                                            criterion='gini')
         ```

     """
    def __init__(self, name: str, elements: List[PipelineElement] = None):
        """
        Initialize the object.

        Parameters:
            name:
                Name of the encapsulated item and/or
                summary of the encapsulated element`s functions.

            elements:
                List of PipelineElements added one after another to the Branch.

        """
        super().__init__(name, {}, test_disabled=False, disabled=False, base_element=True)

        # in case any of the children needs y or covariates we need to request them
        self.needs_y = True
        self.needs_covariates = True
        self.elements = []
        self.has_hyperparameters = True
        self.skip_caching = True
        self.identifier = "BRANCH:"

        # needed for caching on individual level
        self.fix_fold_id = False
        self.do_not_delete_cache_folder = False

        # add elements
        if elements:
            for element in elements:
                self.add(element)

    def fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs):
        """
        Calls the fit function on all underlying base elements.

        Parameters:
            X:
                The array-like input with shape=[N, D], where N is
                the number of samples and D is the number of features.

            y:
                The truth array-like values with shape=[N],
                where N is the number of samples.

            **kwargs:
                Keyword arguments, passed to base_elements fit.

        Returns:
            Fitted self.

        """
        self.base_element = Branch.sanity_check_pipeline(self.base_element)
        return super().fit(X, y, **kwargs)

    def transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray, dict):
        """
        Calls the transform function on all underlying base elements.
        If _estimator_type is in ['classifier', 'regressor'], predict is called instead.

        Parameters:
            X:
                The array-like data with shape=[N, D], where N is the
                number of samples and D is the number of features.

            y:
                The truth array-like values with shape=[N],
                where N is the number of samples.

            **kwargs:
                Keyword arguments, passed to base_elements predict/transform.

        Returns:
            Transformed/Predicted data.

        """
        if self._estimator_type == 'classifier' or self._estimator_type == 'regressor':
            return super().predict(X), y, kwargs
        return super().transform(X, y, **kwargs)

    def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
        """
        Calls the predict function on underlying base elements.

        Parameters:
            X:
                The array-like data with shape=[N, D], where N is the
                number of samples and D is the number of features.

            **kwargs:
                Keyword arguments, passed to base_elements predict method.

        Returns:
            Prediction values.

        """
        return super().predict(X, **kwargs)

    def __iadd__(self, pipe_element: PipelineElement):
        """
        Add an element to the sub pipeline.

        Parameters:
            pipe_element:
                The PipelineElement to add, being either a transformer or an estimator.

        """
        super(Branch, self).__iadd__(pipe_element)
        self._prepare_pipeline()
        return self

    def add(self, pipe_element: PipelineElement):
        """
        Add an element to the sub pipeline.

        Parameters:
            pipe_element:
                The PipelineElement to add, being either a transformer or an estimator.

        """
        self.__iadd__(pipe_element)

    @staticmethod
    def prepare_photon_pipe(elements):
        pipeline_steps = list()
        for item in elements:
            pipeline_steps.append((item.name, item))
        return PhotonPipeline(pipeline_steps)

    @staticmethod
    def sanity_check_pipeline(pipe):
        if isinstance(pipe.elements[-1][1], CallbackElement):
            msg = "Last element of pipeline cannot be callback element, would be mistaken for estimator. Removing it."
            logger.warning(msg)
            warnings.warn(msg)
            del pipe.elements[-1]
        return pipe

    def _prepare_pipeline(self):
        """ Generates sklearn pipeline with all underlying elements """
        self._hyperparameters = {item.name: item.hyperparameters for item in self.elements
                                 if hasattr(item, 'hyperparameters')}

        if self.has_hyperparameters:
            self.generate_sklearn_hyperparameters()
        new_pipe = Branch.prepare_photon_pipe(self.elements)
        new_pipe._fix_fold_id = self.fix_fold_id
        new_pipe._do_not_delete_cache_folder = self.do_not_delete_cache_folder
        self.base_element = new_pipe

    def copy_me(self):
        new_copy_of_me = self.__class__(self.name)
        for item in self.elements:
            if hasattr(item, 'copy_me'):
                copy_item = item.copy_me()
            else:
                copy_item = deepcopy(item)
            new_copy_of_me += copy_item
        if self.current_config is not None:
            new_copy_of_me.set_params(**self.current_config)
        new_copy_of_me._random_state = self._random_state
        return new_copy_of_me

    @property
    def hyperparameters(self):
        return self._hyperparameters

    @hyperparameters.setter
    def hyperparameters(self, value):
        """
        Setting hyperparameters does not make sense, only the items that
        added can be optimized, not the container (self).
        """
        return

    @property
    def _estimator_type(self):
        return getattr(self.elements[-1], '_estimator_type')

    def generate_config_grid(self):
        if self.has_hyperparameters:
            tmp_grid = create_global_config_grid(self.elements, self.name)
            return tmp_grid
        else:
            return []

    def generate_sklearn_hyperparameters(self):
        """
        Generates a dictionary according to the sklearn convention of
        element_name__parameter_name: parameter_value
        """
        self._hyperparameters = {}
        for element in self.elements:
            for attribute, value_list in element.hyperparameters.items():
                self._hyperparameters[self.name + '__' + attribute] = value_list

    def _check_hyper(self, BaseEstimator):
        pass

    @property
    def feature_importances_(self):
        if hasattr(self.elements[-1], 'feature_importances_'):
            return getattr(self.elements[-1], 'feature_importances_')

__iadd__(self, pipe_element) special

Add an element to the sub pipeline.

Parameters:

Name Type Description Default
pipe_element PipelineElement

The PipelineElement to add, being either a transformer or an estimator.

required
Source code in photonai/base/photon_elements.py
def __iadd__(self, pipe_element: PipelineElement):
    """
    Add an element to the sub pipeline.

    Parameters:
        pipe_element:
            The PipelineElement to add, being either a transformer or an estimator.

    """
    super(Branch, self).__iadd__(pipe_element)
    self._prepare_pipeline()
    return self

__init__(self, name, elements=None) special

Initialize the object.

Parameters:

Name Type Description Default
name str

Name of the encapsulated item and/or summary of the encapsulated element`s functions.

required
elements List[photonai.base.photon_elements.PipelineElement]

List of PipelineElements added one after another to the Branch.

None
Source code in photonai/base/photon_elements.py
def __init__(self, name: str, elements: List[PipelineElement] = None):
    """
    Initialize the object.

    Parameters:
        name:
            Name of the encapsulated item and/or
            summary of the encapsulated element`s functions.

        elements:
            List of PipelineElements added one after another to the Branch.

    """
    super().__init__(name, {}, test_disabled=False, disabled=False, base_element=True)

    # in case any of the children needs y or covariates we need to request them
    self.needs_y = True
    self.needs_covariates = True
    self.elements = []
    self.has_hyperparameters = True
    self.skip_caching = True
    self.identifier = "BRANCH:"

    # needed for caching on individual level
    self.fix_fold_id = False
    self.do_not_delete_cache_folder = False

    # add elements
    if elements:
        for element in elements:
            self.add(element)

add(self, pipe_element)

Add an element to the sub pipeline.

Parameters:

Name Type Description Default
pipe_element PipelineElement

The PipelineElement to add, being either a transformer or an estimator.

required
Source code in photonai/base/photon_elements.py
def add(self, pipe_element: PipelineElement):
    """
    Add an element to the sub pipeline.

    Parameters:
        pipe_element:
            The PipelineElement to add, being either a transformer or an estimator.

    """
    self.__iadd__(pipe_element)

fit(self, X, y=None, **kwargs)

Calls the fit function on all underlying base elements.

Parameters:

Name Type Description Default
X ndarray

The array-like input with shape=[N, D], where N is the number of samples and D is the number of features.

required
y ndarray

The truth array-like values with shape=[N], where N is the number of samples.

None
**kwargs

Keyword arguments, passed to base_elements fit.

{}

Returns:

Type Description

Fitted self.

Source code in photonai/base/photon_elements.py
def fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs):
    """
    Calls the fit function on all underlying base elements.

    Parameters:
        X:
            The array-like input with shape=[N, D], where N is
            the number of samples and D is the number of features.

        y:
            The truth array-like values with shape=[N],
            where N is the number of samples.

        **kwargs:
            Keyword arguments, passed to base_elements fit.

    Returns:
        Fitted self.

    """
    self.base_element = Branch.sanity_check_pipeline(self.base_element)
    return super().fit(X, y, **kwargs)

predict(self, X, **kwargs)

Calls the predict function on underlying base elements.

Parameters:

Name Type Description Default
X ndarray

The array-like data with shape=[N, D], where N is the number of samples and D is the number of features.

required
**kwargs

Keyword arguments, passed to base_elements predict method.

{}

Returns:

Type Description
ndarray

Prediction values.

Source code in photonai/base/photon_elements.py
def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
    """
    Calls the predict function on underlying base elements.

    Parameters:
        X:
            The array-like data with shape=[N, D], where N is the
            number of samples and D is the number of features.

        **kwargs:
            Keyword arguments, passed to base_elements predict method.

    Returns:
        Prediction values.

    """
    return super().predict(X, **kwargs)

transform(self, X, y=None, **kwargs)

Calls the transform function on all underlying base elements. If _estimator_type is in ['classifier', 'regressor'], predict is called instead.

Parameters:

Name Type Description Default
X ndarray

The array-like data with shape=[N, D], where N is the number of samples and D is the number of features.

required
y ndarray

The truth array-like values with shape=[N], where N is the number of samples.

None
**kwargs

Keyword arguments, passed to base_elements predict/transform.

{}

Returns:

Type Description
(<class 'numpy.ndarray'>, <class 'numpy.ndarray'>, <class 'dict'>)

Transformed/Predicted data.

Source code in photonai/base/photon_elements.py
def transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray, dict):
    """
    Calls the transform function on all underlying base elements.
    If _estimator_type is in ['classifier', 'regressor'], predict is called instead.

    Parameters:
        X:
            The array-like data with shape=[N, D], where N is the
            number of samples and D is the number of features.

        y:
            The truth array-like values with shape=[N],
            where N is the number of samples.

        **kwargs:
            Keyword arguments, passed to base_elements predict/transform.

    Returns:
        Transformed/Predicted data.

    """
    if self._estimator_type == 'classifier' or self._estimator_type == 'regressor':
        return super().predict(X), y, kwargs
    return super().transform(X, y, **kwargs)