Documentation for PipelineElement
PHOTONAI wrapper class for any transformer or estimator in the pipeline.
So called PHOTONAI PipelineElements can be added to the Hyperpipe, each of them being a data-processing method or a learning algorithm. By choosing, combining data-processing methods and algorithms, and arranging them with the PHOTONAI classes, simple and complex pipeline architectures can be designed rapidly.
The PHOTONAI PipelineElement implements several helpful features:
- Saves the hyperparameters that should be tested and creates a grid of all hyperparameter configurations.
- Enables fast and rapid instantiation of pipeline elements per string identifier, e.g 'svc' creates an sklearn.svm.SVC object.
- Attaches a "disable" switch to every element in the pipeline in order to test a complete disable.
Source code in photonai/base/photon_elements.py
class PipelineElement(BaseEstimator):
"""
PHOTONAI wrapper class for any transformer or estimator in the pipeline.
So called PHOTONAI PipelineElements can be added to the Hyperpipe,
each of them being a data-processing method or a learning algorithm.
By choosing, combining data-processing methods and algorithms,
and arranging them with the PHOTONAI classes, simple and complex
pipeline architectures can be designed rapidly.
The PHOTONAI PipelineElement implements several helpful features:
- Saves the hyperparameters that should be tested
and creates a grid of all hyperparameter configurations.
- Enables fast and rapid instantiation of pipeline
elements per string identifier, e.g 'svc' creates
an sklearn.svm.SVC object.
- Attaches a "disable" switch to every element
in the pipeline in order to test a complete disable.
"""
def __init__(self, name: str, hyperparameters: dict = None, test_disabled: bool = False,
disabled: bool = False, base_element: BaseEstimator = None, batch_size: int = 0, **kwargs) -> None:
"""
Takes a string literal and transforms it into an object
of the associated class (see PhotonCore.JSON).
Parameters:
name:
A string literal encoding the class to be instantiated.
hyperparameters:
Which values/value range should be tested for the
hyperparameter.
In form of Dict: parameter_name -> HyperparameterElement.
test_disabled:
If the hyperparameter search should evaluate a
complete disabling of the element.
disabled:
If true, the element is currently disabled and
does nothing except return the data it received.
base_element:
The underlying BaseEstimator. If not given the
instantiation per string identifier takes place.
batch_size:
Size of the division on which is calculated separately.
**kwargs:
Any parameters that should be passed to the object
to be instantiated, default parameters.
"""
if hyperparameters is None:
hyperparameters = {}
if base_element is None:
# Registering Pipeline Elements
if len(PhotonRegistry.ELEMENT_DICTIONARY) == 0:
registry = PhotonRegistry
if name not in PhotonRegistry.ELEMENT_DICTIONARY:
# try to reload
PhotonRegistry.ELEMENT_DICTIONARY = PhotonRegistry().get_package_info()
if name in PhotonRegistry.ELEMENT_DICTIONARY:
try:
desired_class_info = PhotonRegistry.ELEMENT_DICTIONARY[name]
desired_class_home = desired_class_info[0]
desired_class_name = desired_class_info[1]
imported_module = importlib.import_module(desired_class_home)
desired_class = getattr(imported_module, desired_class_name)
self.base_element = desired_class(**kwargs)
except AttributeError as ae:
logger.error('ValueError: Could not find according class:'
+ str(PhotonRegistry.ELEMENT_DICTIONARY[name]))
raise ValueError('Could not find according class:', PhotonRegistry.ELEMENT_DICTIONARY[name])
else:
# if even after reload the element does not appear, it is not supported
logger.error('Element not supported right now:' + name)
raise NameError('Element not supported right now:', name)
else:
self.base_element = base_element
self.is_transformer = hasattr(self.base_element, "transform")
self.reduce_dimension = False # boolean - set on transform method
self.is_estimator = hasattr(self.base_element, "predict")
self._name = name
self.initial_name = str(name)
self.kwargs = kwargs
self.current_config = None
self.batch_size = batch_size
self.test_disabled = test_disabled
self.initial_hyperparameters = dict(hyperparameters)
self._sklearn_disabled = self.name + '__disabled'
self._hyperparameters = hyperparameters
if len(hyperparameters) > 0:
key_0 = next(iter(hyperparameters))
if self.name not in key_0:
self.hyperparameters = hyperparameters
else:
self.hyperparameters = hyperparameters
# self.initalize_hyperparameters = hyperparameters
# check if hyperparameters are already in sklearn style
# check if hyperparameters are members of the class
if self.is_transformer or self.is_estimator:
self._check_hyperparameters(BaseEstimator)
self.disabled = disabled
# check if self.base element needs y for fitting and transforming
if hasattr(self.base_element, 'needs_y'):
self.needs_y = self.base_element.needs_y
else:
self.needs_y = False
# or if it maybe needs covariates for fitting and transforming
if hasattr(self.base_element, 'needs_covariates'):
self.needs_covariates = self.base_element.needs_covariates
else:
self.needs_covariates = False
self._random_state = False
@property
def name(self):
return self._name
@name.setter
def name(self, value):
self._name = value
self.generate_sklearn_hyperparameters(self.initial_hyperparameters)
@property
def hyperparameters(self):
return self._hyperparameters
@hyperparameters.setter
def hyperparameters(self, value: dict):
self.generate_sklearn_hyperparameters(value)
def _check_hyperparameters(self, BaseEstimator):
# check if hyperparameters are members of the class
not_supported_hyperparameters = list(
set([key.split("__")[-1] for key in self._hyperparameters.keys() if key.split("__")[-1] != "disabled"]) -
set(BaseEstimator.get_params(self.base_element).keys()))
if not_supported_hyperparameters:
error_message = 'ValueError: Set of hyperparameters are not valid, check hyperparameters:' + \
str(not_supported_hyperparameters)
logger.error(error_message)
raise ValueError(error_message)
def generate_sklearn_hyperparameters(self, value: dict):
"""
Generates a dictionary according to the sklearn convention of
element_name__parameter_name: parameter_value.
"""
self._hyperparameters = {}
for attribute, value_list in value.items():
self._hyperparameters[self.name + '__' + attribute] = value_list
if self.test_disabled:
self._hyperparameters[self._sklearn_disabled] = [False, True]
@property
def random_state(self):
return self._random_state
@random_state.setter
def random_state(self, random_state):
self._random_state = random_state
if hasattr(self, 'elements'):
for el in self.elements:
if hasattr(el, 'random_state'):
el.random_state = self._random_state
if hasattr(self, "base_element") and hasattr(self.base_element, "random_state"):
self.base_element.random_state = random_state
@property
def _estimator_type(self):
# estimator_type obligation for estimators, is ignored if a transformer is given
# prevention of misuse through predict test (predict method available <=> Estimator).
est_type = getattr(self.base_element, '_estimator_type', None)
if est_type in [None, 'transformer']:
if hasattr(self.base_element, 'predict'):
raise NotImplementedError("Element has predict() method but does not specify whether it is a regressor"
" or classifier. Remember to inherit from ClassifierMixin or RegressorMixin.")
return None
else:
if est_type not in ['classifier', 'regressor']:
raise NotImplementedError("Currently, we only support type classifier or regressor."
" Is {}.".format(est_type))
if not hasattr(self.base_element, 'predict'):
raise NotImplementedError("Estimator does not implement predict() method.")
return est_type
# this is only here because everything inherits from PipelineElement.
def __iadd__(self, pipe_element):
"""
Add an element to the intern list of elements.
Parameters:
pipe_element (PipelineElement):
The object to add, being either a transformer or an estimator.
"""
PipelineElement.sanity_check_element_type_for_building_photon_pipes(pipe_element, type(self))
# check if that exact instance has been added before
already_added_objects = len([i for i in self.elements if i is pipe_element])
if already_added_objects > 0:
error_msg = "Cannot add the same instance twice to " + self.name + " - " + str(type(self))
logger.error(error_msg)
raise ValueError(error_msg)
# check for doubled names:
already_existing_element_with_that_name = len([i for i in self.elements if i.name == pipe_element.name])
if already_existing_element_with_that_name > 0:
error_msg = "Already added a pipeline element with the name " + pipe_element.name + " to " + self.name
logger.warning(error_msg)
warnings.warn(error_msg)
# check for other items that have been renamed
nr_of_existing_elements_with_that_name = len([i for i in self.elements if i.name.startswith(pipe_element.name)])
new_name = pipe_element.name + str(nr_of_existing_elements_with_that_name + 1)
while len([i for i in self.elements if i.name == new_name]) > 0:
nr_of_existing_elements_with_that_name += 1
new_name = pipe_element.name + str(nr_of_existing_elements_with_that_name + 1)
msg = "Renaming " + pipe_element.name + " in " + self.name + " to " + new_name + " in " + self.name
logger.warning(msg)
warnings.warn(msg)
pipe_element.name = new_name
self.elements.append(pipe_element)
return self
def copy_me(self):
if self.name in PhotonRegistry.ELEMENT_DICTIONARY:
# we need initial name to refer to the class to be instantiated (SVC) even though the name might be SVC2
copy = PipelineElement(self.initial_name, {}, test_disabled=self.test_disabled,
disabled=self.disabled, batch_size=self.batch_size, **self.kwargs)
copy.initial_hyperparameters = self.initial_hyperparameters
# in the setter of the name, we use initial hyperparameters to adjust the hyperparameters to the name
copy.name = self.name
else:
if hasattr(self.base_element, 'copy_me'):
new_base_element = self.base_element.copy_me()
else:
try:
new_base_element = deepcopy(self.base_element)
except Exception as e:
error_msg = "Cannot copy custom element " + self.name + ". Please specify a copy_me() method " \
"returning a copy of the object"
logger.error(error_msg)
raise e
# handle custom elements
copy = PipelineElement.create(self.name, new_base_element, hyperparameters=self.hyperparameters,
test_disabled=self.test_disabled,
disabled=self.disabled, batch_size=self.batch_size,
**self.kwargs)
if self.current_config is not None:
copy.set_params(**self.current_config)
copy._random_state = self._random_state
return copy
@classmethod
def create(cls, name: str, base_element: BaseEstimator, hyperparameters: dict, test_disabled: bool = False,
disabled: bool = False, **kwargs):
"""
Takes an instantiated object and encapsulates it
into the PHOTONAI structure.
Add the disabled function and attaches
information about the hyperparameters that should be tested.
Parameters:
name:
A string literal encoding the class to be instantiated.
base_element:
The underlying transformer or estimator class.
hyperparameters:
Which values/value range should be tested for the
hyperparameter.
In form of Dict: parameter_name -> HyperparameterElement.
test_disabled:
If the hyperparameter search should evaluate a
complete disabling of the element.
disabled:
If true, the element is currently disabled and
does nothing except return the data it received.
**kwargs:
Any parameters that should be passed to the object
to be instantiated, default parameters.
Example:
``` python
class RD(BaseEstimator, TransformerMixin):
def fit(self, X, y, **kwargs):
pass
def fit_transform(self, X, y=None, **fit_params):
return self.transform(X)
def transform(self, X):
return X[:, :3]
trans = PipelineElement.create('MyTransformer', base_element=RD(), hyperparameters={})
```
"""
if isinstance(base_element, type):
raise ValueError("Base element should be an instance but is a class.")
return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
@property
def feature_importances_(self):
if hasattr(self.base_element, 'feature_importances_'):
return self.base_element.feature_importances_.tolist()
elif hasattr(self.base_element, 'coef_'):
return self.base_element.coef_.tolist()
def generate_config_grid(self):
config_dict = create_global_config_dict([self])
if len(config_dict) > 0:
if self.test_disabled:
config_dict.pop(self._sklearn_disabled)
config_list = list(ParameterGrid(config_dict))
if self.test_disabled:
for item in config_list:
item[self._sklearn_disabled] = False
config_list.append({self._sklearn_disabled: True})
if len(config_list) < 2:
config_list.append({self._sklearn_disabled: False})
return config_list
else:
return []
def get_params(self, deep: bool = True):
"""
Forwards the get_params request to the wrapped base element.
"""
if hasattr(self.base_element, 'get_params'):
params = self.base_element.get_params(deep)
params["name"] = self.name
return params
else:
return None
def set_params(self, **kwargs):
"""
Forwards the set_params request to the wrapped base element
Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper
"""
# this is an ugly hack to approximate the right settings when copying the element
self.current_config = kwargs
# element disable is a construct used for this container only
if self._sklearn_disabled in kwargs:
self.disabled = kwargs[self._sklearn_disabled]
del kwargs[self._sklearn_disabled]
elif 'disabled' in kwargs:
self.disabled = kwargs['disabled']
del kwargs['disabled']
self.base_element.set_params(**kwargs)
return self
def fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs):
"""
Calls the fit function of the base element.
Parameters:
X:
The array-like training and test data with shape=[N, D],
where N is the number of samples and D is the number of features.
y:
The truth array-like values with shape=[N],
where N is the number of samples.
**kwargs:
Keyword arguments, passed to base_element.predict.
Returns:
Fitted self.
"""
if not self.disabled:
obj = self.base_element
arg_list = inspect.signature(obj.fit)
if len(arg_list.parameters) > 2:
vals = arg_list.parameters.values()
kwargs_param = list(vals)[-1]
if kwargs_param.kind == kwargs_param.VAR_KEYWORD:
obj.fit(X, y, **kwargs)
return self
obj.fit(X, y)
return self
def __batch_predict(self, delegate, X, **kwargs):
if not isinstance(X, list) and not isinstance(X, np.ndarray):
msg = "Cannot do batching on a single entity."
logger.warning(msg)
warnings.warn(msg)
return delegate(X, **kwargs)
# initialize return values
processed_y = None
nr = PhotonDataHelper.find_n(X)
batch_idx = 0
for start, stop in PhotonDataHelper.chunker(nr, self.batch_size):
batch_idx += 1
logger.debug(self.name + " is predicting batch " + str(batch_idx))
# split data in batches
X_batched, y_batched, kwargs_dict_batched = PhotonDataHelper.split_data(X, None, kwargs, start, stop)
# predict
y_pred = delegate(X_batched, **kwargs_dict_batched)
processed_y = PhotonDataHelper.stack_data_vertically(processed_y, y_pred)
return processed_y
def __predict(self, X, **kwargs):
if not self.disabled:
if hasattr(self.base_element, 'predict'):
return self.adjusted_predict_call(self.base_element.predict, X, **kwargs)
else:
logger.error('BaseException. base Element should have function ' +
'predict.')
raise BaseException('base Element should have function predict.')
else:
return X
def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
"""
Calls the predict function of the underlying base_element.
Parameters:
X:
The array-like training and test data with shape=[N, D],
where N is the number of samples and D is the number of features.
**kwargs:
Keyword arguments, passed to base_element.predict.
Returns:
Predictions values.
"""
if self.batch_size == 0:
return self.__predict(X, **kwargs)
else:
return self.__batch_predict(self.__predict, X, **kwargs)
def predict_proba(self, X, **kwargs):
if self.batch_size == 0:
return self.__predict_proba(X, **kwargs)
else:
return self.__batch_predict(self.__predict_proba, X, **kwargs)
def __predict_proba(self, X: np.ndarray, **kwargs):
"""
Predict probabilities
base element needs predict_proba() function, otherwise throw
base exception.
"""
if not self.disabled:
if hasattr(self.base_element, 'predict_proba'):
# todo: here, I used delegate call (same as below in predict within the transform call)
#return self.base_element.predict_proba(X)
return self.adjusted_predict_call(self.base_element.predict_proba, X, **kwargs)
else:
# todo: in case _final_estimator is a Branch, we do not know beforehand it the base elements will
# have a predict_proba -> if not, just return None (@Ramona, does this make sense?)
# logger.error('BaseException. base Element should have "predict_proba" function.')
# raise BaseException('base Element should have predict_proba function.')
return None
return X
def __transform(self, X, y=None, **kwargs):
if not self.disabled:
if hasattr(self.base_element, 'transform'):
return self.adjusted_delegate_call(self.base_element.transform, X, y, **kwargs)
elif hasattr(self.base_element, 'predict'):
return self.predict(X, **kwargs), y, kwargs
else:
logger.error('BaseException: transform-predict-mess')
raise BaseException('transform-predict-mess')
else:
return X, y, kwargs
def transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray, dict):
"""
Calls transform on the base element.
In case there is no transform method, calls predict.
This is used if we are using an estimator as a preprocessing step.
Parameters:
X:
The array-like data with shape=[N, D], where N is the
number of samples and D is the number of features.
y:
The truth array-like values with shape=[N], where N is
the number of samples.
**kwargs:
Keyword arguments, passed to base_element.transform.
Returns:
(X, y) in transformed version and original kwargs.
"""
if self.batch_size == 0:
Xt, yt, kwargs = self.__transform(X, y, **kwargs)
else:
Xt, yt, kwargs = self.__batch_transform(X, y, **kwargs)
if all(hasattr(data, "shape") for data in [X, Xt]) and all(len(data.shape) > 1 for data in [X, Xt]):
self.reduce_dimension = (Xt.shape[1] < X.shape[1])
return Xt, yt, kwargs
def inverse_transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray, dict):
"""
Calls inverse_transform on the base element.
When the dimension is preserved: transformers
without inverse returns original input.
Parameters:
X:
The array-like data with shape=[N, D], where N
is the number of samples and D is the number of features.
y:
The truth array-like values with shape=[N], where N is
the number of samples.
**kwargs:
Keyword arguments, passed to base_element.transform.
Raises:
NotImplementedError:
Thrown when there is a dimensional reduction but no inverse is defined.
Returns:
(X, y, kwargs) in back-transformed version.
"""
if hasattr(self.base_element, 'inverse_transform'):
# todo: check this
X, y, kwargs = self.adjusted_delegate_call(self.base_element.inverse_transform, X, y, **kwargs)
elif self.is_transformer and self.reduce_dimension:
msg = "{} has no inverse_transform, but element reduce dimesions.".format(self.name)
logger.error(msg)
raise NotImplementedError(msg)
return X, y, kwargs
def __batch_transform(self, X, y=None, **kwargs):
if not isinstance(X, list) and not isinstance(X, np.ndarray):
warning = "Cannot do batching on a single entity."
logger.warning(warning)
warnings.warn(warning)
return self.__transform(X, y, **kwargs)
# initialize return values
processed_X = None
processed_y = None
processed_kwargs = dict()
nr = PhotonDataHelper.find_n(X)
batch_idx = 0
for start, stop in PhotonDataHelper.chunker(nr, self.batch_size):
batch_idx += 1
# split data in batches
X_batched, y_batched, kwargs_dict_batched = PhotonDataHelper.split_data(X, y, kwargs, start, stop)
actual_batch_size = PhotonDataHelper.find_n(X_batched)
logger.debug(self.name + " is transforming batch " + str(batch_idx) + " with " + str(actual_batch_size)
+ " items.")
# call transform
X_new, y_new, kwargs_new = self.adjusted_delegate_call(self.base_element.transform, X_batched, y_batched,
**kwargs_dict_batched)
# stack results
processed_X, processed_y, processed_kwargs = PhotonDataHelper.join_data(processed_X, X_new, processed_y,
y_new,
processed_kwargs, kwargs_new)
return processed_X, processed_y, processed_kwargs
def adjusted_delegate_call(self, delegate, X, y, **kwargs):
# Case| transforms X | needs_y | needs_covariates
# -------------------------------------------------------
# 1 yes no no = transform(X) -> returns Xt
# todo: case does not exist any longer
# 2 yes yes no = transform(X, y) -> returns Xt, yt
# 3 yes yes yes = transform(X, y, kwargs) -> returns Xt, yt, kwargst
# 4 yes no yes = transform(X, kwargs) -> returns Xt, kwargst
# 5 no yes or no yes or no = NOT ALLOWED
# todo: we don't need to check for Switch, Stack or Branch since those classes define
# needs_y and needs_covariates in their __init__()
if self.needs_y:
# if we dont have any target vector, we are in "predict"-mode although we are currently transforming
# in this case, we want to skip the transformation and pass X, None and kwargs onwards
# so basically, we skip all training_only elements
# todo: I think, there's no way around this if we want to pass y and kwargs down to children of Switch and Branch
if isinstance(self, (Switch, Branch, Preprocessing)):
X, y, kwargs = delegate(X, y, **kwargs)
else:
if y is not None:
# todo: in case a method needs y, we should also always pass kwargs
# i.e. if we change the number of samples, we also need to apply that change to all kwargs
# todo: talk to Ramona! Maybe we actually DO need this case
if self.needs_covariates:
X, y, kwargs = delegate(X, y, **kwargs)
else:
X, y = delegate(X, y)
elif self.needs_covariates:
X, kwargs = delegate(X, **kwargs)
else:
X = delegate(X)
return X, y, kwargs
def adjusted_predict_call(self, delegate, X, **kwargs):
if self.needs_covariates:
return delegate(X, **kwargs)
else:
return delegate(X)
def score(self, X_test: np.ndarray, y_test: np.ndarray) -> float:
"""
Calls the score function on the base element.
Parameters:
X_test:
Input test data to score on.
y_test:
Input true targets to score on.
Returns:
A goodness of fit measure or a likelihood of unseen data.
"""
return self.base_element.score(X_test, y_test)
def prettify_config_output(self, config_name: str, config_value, return_dict: bool = False):
"""Make hyperparameter combinations human readable """
if config_name == "disabled" and config_value is False:
if return_dict:
return {'disabled': False}
else:
return "disabled = False"
else:
if return_dict:
return {config_name: config_value}
else:
return config_name + '=' + str(config_value)
@staticmethod
def sanity_check_element_type_for_building_photon_pipes(pipe_element, type_of_self):
if (not isinstance(pipe_element, PipelineElement) and not isinstance(pipe_element, PhotonNative)) or isinstance(pipe_element, Preprocessing):
raise TypeError(str(type_of_self) + " only accepts PHOTON elements. Cannot add element of type " + str(type(pipe_element)))
__iadd__(self, pipe_element)
special
Add an element to the intern list of elements.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
pipe_element |
PipelineElement |
The object to add, being either a transformer or an estimator. |
required |
Source code in photonai/base/photon_elements.py
def __iadd__(self, pipe_element):
"""
Add an element to the intern list of elements.
Parameters:
pipe_element (PipelineElement):
The object to add, being either a transformer or an estimator.
"""
PipelineElement.sanity_check_element_type_for_building_photon_pipes(pipe_element, type(self))
# check if that exact instance has been added before
already_added_objects = len([i for i in self.elements if i is pipe_element])
if already_added_objects > 0:
error_msg = "Cannot add the same instance twice to " + self.name + " - " + str(type(self))
logger.error(error_msg)
raise ValueError(error_msg)
# check for doubled names:
already_existing_element_with_that_name = len([i for i in self.elements if i.name == pipe_element.name])
if already_existing_element_with_that_name > 0:
error_msg = "Already added a pipeline element with the name " + pipe_element.name + " to " + self.name
logger.warning(error_msg)
warnings.warn(error_msg)
# check for other items that have been renamed
nr_of_existing_elements_with_that_name = len([i for i in self.elements if i.name.startswith(pipe_element.name)])
new_name = pipe_element.name + str(nr_of_existing_elements_with_that_name + 1)
while len([i for i in self.elements if i.name == new_name]) > 0:
nr_of_existing_elements_with_that_name += 1
new_name = pipe_element.name + str(nr_of_existing_elements_with_that_name + 1)
msg = "Renaming " + pipe_element.name + " in " + self.name + " to " + new_name + " in " + self.name
logger.warning(msg)
warnings.warn(msg)
pipe_element.name = new_name
self.elements.append(pipe_element)
return self
__init__(self, name, hyperparameters=None, test_disabled=False, disabled=False, base_element=None, batch_size=0, **kwargs)
special
Takes a string literal and transforms it into an object of the associated class (see PhotonCore.JSON).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name |
str |
A string literal encoding the class to be instantiated. |
required |
hyperparameters |
dict |
Which values/value range should be tested for the hyperparameter. In form of Dict: parameter_name -> HyperparameterElement. |
None |
test_disabled |
bool |
If the hyperparameter search should evaluate a complete disabling of the element. |
False |
disabled |
bool |
If true, the element is currently disabled and does nothing except return the data it received. |
False |
base_element |
BaseEstimator |
The underlying BaseEstimator. If not given the instantiation per string identifier takes place. |
None |
batch_size |
int |
Size of the division on which is calculated separately. |
0 |
**kwargs |
Any parameters that should be passed to the object to be instantiated, default parameters. |
{} |
Source code in photonai/base/photon_elements.py
def __init__(self, name: str, hyperparameters: dict = None, test_disabled: bool = False,
disabled: bool = False, base_element: BaseEstimator = None, batch_size: int = 0, **kwargs) -> None:
"""
Takes a string literal and transforms it into an object
of the associated class (see PhotonCore.JSON).
Parameters:
name:
A string literal encoding the class to be instantiated.
hyperparameters:
Which values/value range should be tested for the
hyperparameter.
In form of Dict: parameter_name -> HyperparameterElement.
test_disabled:
If the hyperparameter search should evaluate a
complete disabling of the element.
disabled:
If true, the element is currently disabled and
does nothing except return the data it received.
base_element:
The underlying BaseEstimator. If not given the
instantiation per string identifier takes place.
batch_size:
Size of the division on which is calculated separately.
**kwargs:
Any parameters that should be passed to the object
to be instantiated, default parameters.
"""
if hyperparameters is None:
hyperparameters = {}
if base_element is None:
# Registering Pipeline Elements
if len(PhotonRegistry.ELEMENT_DICTIONARY) == 0:
registry = PhotonRegistry
if name not in PhotonRegistry.ELEMENT_DICTIONARY:
# try to reload
PhotonRegistry.ELEMENT_DICTIONARY = PhotonRegistry().get_package_info()
if name in PhotonRegistry.ELEMENT_DICTIONARY:
try:
desired_class_info = PhotonRegistry.ELEMENT_DICTIONARY[name]
desired_class_home = desired_class_info[0]
desired_class_name = desired_class_info[1]
imported_module = importlib.import_module(desired_class_home)
desired_class = getattr(imported_module, desired_class_name)
self.base_element = desired_class(**kwargs)
except AttributeError as ae:
logger.error('ValueError: Could not find according class:'
+ str(PhotonRegistry.ELEMENT_DICTIONARY[name]))
raise ValueError('Could not find according class:', PhotonRegistry.ELEMENT_DICTIONARY[name])
else:
# if even after reload the element does not appear, it is not supported
logger.error('Element not supported right now:' + name)
raise NameError('Element not supported right now:', name)
else:
self.base_element = base_element
self.is_transformer = hasattr(self.base_element, "transform")
self.reduce_dimension = False # boolean - set on transform method
self.is_estimator = hasattr(self.base_element, "predict")
self._name = name
self.initial_name = str(name)
self.kwargs = kwargs
self.current_config = None
self.batch_size = batch_size
self.test_disabled = test_disabled
self.initial_hyperparameters = dict(hyperparameters)
self._sklearn_disabled = self.name + '__disabled'
self._hyperparameters = hyperparameters
if len(hyperparameters) > 0:
key_0 = next(iter(hyperparameters))
if self.name not in key_0:
self.hyperparameters = hyperparameters
else:
self.hyperparameters = hyperparameters
# self.initalize_hyperparameters = hyperparameters
# check if hyperparameters are already in sklearn style
# check if hyperparameters are members of the class
if self.is_transformer or self.is_estimator:
self._check_hyperparameters(BaseEstimator)
self.disabled = disabled
# check if self.base element needs y for fitting and transforming
if hasattr(self.base_element, 'needs_y'):
self.needs_y = self.base_element.needs_y
else:
self.needs_y = False
# or if it maybe needs covariates for fitting and transforming
if hasattr(self.base_element, 'needs_covariates'):
self.needs_covariates = self.base_element.needs_covariates
else:
self.needs_covariates = False
self._random_state = False
create(name, base_element, hyperparameters, test_disabled=False, disabled=False, **kwargs)
classmethod
Takes an instantiated object and encapsulates it into the PHOTONAI structure. Add the disabled function and attaches information about the hyperparameters that should be tested.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
name |
str |
A string literal encoding the class to be instantiated. |
required |
base_element |
BaseEstimator |
The underlying transformer or estimator class. |
required |
hyperparameters |
dict |
Which values/value range should be tested for the hyperparameter. In form of Dict: parameter_name -> HyperparameterElement. |
required |
test_disabled |
bool |
If the hyperparameter search should evaluate a complete disabling of the element. |
False |
disabled |
bool |
If true, the element is currently disabled and does nothing except return the data it received. |
False |
**kwargs |
Any parameters that should be passed to the object to be instantiated, default parameters. |
{} |
Examples:
1 2 3 4 5 6 7 8 9 10 11 12 |
|
Source code in photonai/base/photon_elements.py
@classmethod
def create(cls, name: str, base_element: BaseEstimator, hyperparameters: dict, test_disabled: bool = False,
disabled: bool = False, **kwargs):
"""
Takes an instantiated object and encapsulates it
into the PHOTONAI structure.
Add the disabled function and attaches
information about the hyperparameters that should be tested.
Parameters:
name:
A string literal encoding the class to be instantiated.
base_element:
The underlying transformer or estimator class.
hyperparameters:
Which values/value range should be tested for the
hyperparameter.
In form of Dict: parameter_name -> HyperparameterElement.
test_disabled:
If the hyperparameter search should evaluate a
complete disabling of the element.
disabled:
If true, the element is currently disabled and
does nothing except return the data it received.
**kwargs:
Any parameters that should be passed to the object
to be instantiated, default parameters.
Example:
``` python
class RD(BaseEstimator, TransformerMixin):
def fit(self, X, y, **kwargs):
pass
def fit_transform(self, X, y=None, **fit_params):
return self.transform(X)
def transform(self, X):
return X[:, :3]
trans = PipelineElement.create('MyTransformer', base_element=RD(), hyperparameters={})
```
"""
if isinstance(base_element, type):
raise ValueError("Base element should be an instance but is a class.")
return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
fit(self, X, y=None, **kwargs)
Calls the fit function of the base element.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The array-like training and test data with shape=[N, D], where N is the number of samples and D is the number of features. |
required |
y |
ndarray |
The truth array-like values with shape=[N], where N is the number of samples. |
None |
**kwargs |
Keyword arguments, passed to base_element.predict. |
{} |
Returns:
Type | Description |
---|---|
Fitted self. |
Source code in photonai/base/photon_elements.py
def fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs):
"""
Calls the fit function of the base element.
Parameters:
X:
The array-like training and test data with shape=[N, D],
where N is the number of samples and D is the number of features.
y:
The truth array-like values with shape=[N],
where N is the number of samples.
**kwargs:
Keyword arguments, passed to base_element.predict.
Returns:
Fitted self.
"""
if not self.disabled:
obj = self.base_element
arg_list = inspect.signature(obj.fit)
if len(arg_list.parameters) > 2:
vals = arg_list.parameters.values()
kwargs_param = list(vals)[-1]
if kwargs_param.kind == kwargs_param.VAR_KEYWORD:
obj.fit(X, y, **kwargs)
return self
obj.fit(X, y)
return self
inverse_transform(self, X, y=None, **kwargs)
Calls inverse_transform on the base element.
When the dimension is preserved: transformers without inverse returns original input.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The array-like data with shape=[N, D], where N is the number of samples and D is the number of features. |
required |
y |
ndarray |
The truth array-like values with shape=[N], where N is the number of samples. |
None |
**kwargs |
Keyword arguments, passed to base_element.transform. |
{} |
Returns:
Type | Description |
---|---|
(<class 'numpy.ndarray'>, <class 'numpy.ndarray'>, <class 'dict'>) |
(X, y, kwargs) in back-transformed version. |
Source code in photonai/base/photon_elements.py
def inverse_transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray, dict):
"""
Calls inverse_transform on the base element.
When the dimension is preserved: transformers
without inverse returns original input.
Parameters:
X:
The array-like data with shape=[N, D], where N
is the number of samples and D is the number of features.
y:
The truth array-like values with shape=[N], where N is
the number of samples.
**kwargs:
Keyword arguments, passed to base_element.transform.
Raises:
NotImplementedError:
Thrown when there is a dimensional reduction but no inverse is defined.
Returns:
(X, y, kwargs) in back-transformed version.
"""
if hasattr(self.base_element, 'inverse_transform'):
# todo: check this
X, y, kwargs = self.adjusted_delegate_call(self.base_element.inverse_transform, X, y, **kwargs)
elif self.is_transformer and self.reduce_dimension:
msg = "{} has no inverse_transform, but element reduce dimesions.".format(self.name)
logger.error(msg)
raise NotImplementedError(msg)
return X, y, kwargs
predict(self, X, **kwargs)
Calls the predict function of the underlying base_element.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The array-like training and test data with shape=[N, D], where N is the number of samples and D is the number of features. |
required |
**kwargs |
Keyword arguments, passed to base_element.predict. |
{} |
Returns:
Type | Description |
---|---|
ndarray |
Predictions values. |
Source code in photonai/base/photon_elements.py
def predict(self, X: np.ndarray, **kwargs) -> np.ndarray:
"""
Calls the predict function of the underlying base_element.
Parameters:
X:
The array-like training and test data with shape=[N, D],
where N is the number of samples and D is the number of features.
**kwargs:
Keyword arguments, passed to base_element.predict.
Returns:
Predictions values.
"""
if self.batch_size == 0:
return self.__predict(X, **kwargs)
else:
return self.__batch_predict(self.__predict, X, **kwargs)
score(self, X_test, y_test)
Calls the score function on the base element.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X_test |
ndarray |
Input test data to score on. |
required |
y_test |
ndarray |
Input true targets to score on. |
required |
Returns:
Type | Description |
---|---|
float |
A goodness of fit measure or a likelihood of unseen data. |
Source code in photonai/base/photon_elements.py
def score(self, X_test: np.ndarray, y_test: np.ndarray) -> float:
"""
Calls the score function on the base element.
Parameters:
X_test:
Input test data to score on.
y_test:
Input true targets to score on.
Returns:
A goodness of fit measure or a likelihood of unseen data.
"""
return self.base_element.score(X_test, y_test)
transform(self, X, y=None, **kwargs)
Calls transform on the base element.
In case there is no transform method, calls predict. This is used if we are using an estimator as a preprocessing step.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The array-like data with shape=[N, D], where N is the number of samples and D is the number of features. |
required |
y |
ndarray |
The truth array-like values with shape=[N], where N is the number of samples. |
None |
**kwargs |
Keyword arguments, passed to base_element.transform. |
{} |
Returns:
Type | Description |
---|---|
(<class 'numpy.ndarray'>, <class 'numpy.ndarray'>, <class 'dict'>) |
(X, y) in transformed version and original kwargs. |
Source code in photonai/base/photon_elements.py
def transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray, dict):
"""
Calls transform on the base element.
In case there is no transform method, calls predict.
This is used if we are using an estimator as a preprocessing step.
Parameters:
X:
The array-like data with shape=[N, D], where N is the
number of samples and D is the number of features.
y:
The truth array-like values with shape=[N], where N is
the number of samples.
**kwargs:
Keyword arguments, passed to base_element.transform.
Returns:
(X, y) in transformed version and original kwargs.
"""
if self.batch_size == 0:
Xt, yt, kwargs = self.__transform(X, y, **kwargs)
else:
Xt, yt, kwargs = self.__batch_transform(X, y, **kwargs)
if all(hasattr(data, "shape") for data in [X, Xt]) and all(len(data.shape) > 1 for data in [X, Xt]):
self.reduce_dimension = (Xt.shape[1] < X.shape[1])
return Xt, yt, kwargs