Documentation for ImbalancedDataTransformer
Applies the chosen strategy to the data in order to balance the input data. Instantiates the strategy filter object according to the name given as string literal. Underlying architecture: Imbalanced-Learning. More information on their documentation.
Examples:
1 2 3 4 5 6 7 |
|
Source code in photonai/modelwrapper/imbalanced_data_transformer.py
class ImbalancedDataTransformer(BaseEstimator, TransformerMixin):
"""
Applies the chosen strategy to the data in order to balance the input data.
Instantiates the strategy filter object according to the name given as string literal.
Underlying architecture: Imbalanced-Learning.
More information on their [documentation](https://imbalanced-learn.org/stable/).
Example:
``` python
from photonai.optimization import Categorical
tested_methods = Categorical(['RandomOverSampler', 'SMOTEENN', 'SVMSMOTE',
'BorderlineSMOTE', 'SMOTE', 'ClusterCentroids'])
PipelineElement('ImbalancedDataTransformer',
hyperparameters={'method_name': tested_methods},
test_disabled=True)
```
"""
IMBALANCED_DICT = {
'oversampling': ["ADASYN",
"BorderlineSMOTE",
"KMeansSMOTE",
"RandomOverSampler",
"SMOTE",
"SMOTENC",
"SVMSMOTE"],
'undersampling': ["AllKNN",
"ClusterCentroids",
"CondensedNearestNeighbour",
"EditedNearestNeighbours",
"InstanceHardnessThreshold",
"NearMiss",
"NeighbourhoodCleaningRule",
"OneSidedSelection",
"TomekLinks",
"RandomUnderSampler",
"RepeatedEditedNearestNeighbours"],
'combine': ["SMOTEENN", "SMOTETomek"],
}
def __init__(self, method_name: str = 'RandomUnderSampler', config: dict = None):
"""
Instantiates an object that transforms the data into balanced groups according to the given method.
Parameters:
method_name:
Imbalanced learning strategy. Possible values with
- an oversampling strategy are:
- ADASYN,
- BorderlineSMOTE,
- KMeansSMOTE,
- RandomOverSampler,
- SMOTE,
- SMOTENC,
- SVMSMOTE,
- an undersampling strategy are:
- ClusterCentroids,
- RandomUnderSampler,
- NearMiss,
- InstanceHardnessThreshold,
- CondensedNearestNeighbour,
- EditedNearestNeighbours,
- RepeatedEditedNearestNeighbours,
- AllKNN,
- NeighbourhoodCleaningRule,
- OneSidedSelection,
- a combined strategy are:
- SMOTEENN,
- SMOTETomek.
config:
Each strategy has a set of presets. This parameter is necessary
to select the appropriate settings for the selected method.
It is important that the key exactly matches the method_name.
If no key is found for a method, it will be started with the default settings.
Please do not use this parameter inside the 'hyperparmeters' to optimize it.
"""
if not __found__:
raise ModuleNotFoundError("Module imblearn not found or not installed as expected. "
"Please install the requirements.txt in PHOTON main folder.")
self.config = config
self._method_name = None
self.method_name = method_name
self.needs_y = True
@property
def method_name(self):
return self._method_name
@method_name.setter
def method_name(self, value):
imbalance_type = ''
for group, possible_strategies in ImbalancedDataTransformer.IMBALANCED_DICT.items():
if value in possible_strategies:
imbalance_type = group
if imbalance_type == "oversampling":
home = over_sampling
elif imbalance_type == "undersampling":
home = under_sampling
elif imbalance_type == "combine" or imbalance_type == "combination":
home = combine
else:
msg = "Imbalance Type not found. Can be oversampling, undersampling or combine. " \
"Oversampling: method_name one of {}. Undersampling: method_name one of {}." \
"Combine: method_name one of {}.".format(str(self.IMBALANCED_DICT["oversampling"]),
str(self.IMBALANCED_DICT["undersampling"]),
str(self.IMBALANCED_DICT["combine"]))
logger.error(msg)
raise ValueError(msg)
desired_class = getattr(home, value)
self._method_name = value
if self.config is not None and value in self.config:
if not isinstance(self.config[value], dict):
msg = "Please use for the imbalanced config a format like: " \
"config={'SMOTE': {'sampling_strategy': {0: 9, 1: 12}}}."
logger.error(msg)
raise ValueError(msg)
self.method = desired_class(**self.config[value])
else:
self.method = desired_class()
def fit_transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray):
"""
Call of the underlying imblearn.fit_resample(X, y).
Parameters:
X:
The input samples of shape [n_samples, n_features].
y:
The input targets of shape [n_samples, 1].
**kwargs:
Ignored input.
Returns:
Transformed data.
"""
return self.method.fit_resample(X, y)
# define an alias for imblearn consistency
fit_sample = fit_transform
fit_resample = fit_transform
def fit(self, X, y, **kwargs):
"""Empty method required in PHOTONAI."""
return
def transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray):
"""
Forwarding to the self.fit_transform method.
Parameters:
X:
The input samples of shape [n_samples, n_features].
y:
The input targets of shape [n_samples, 1].
**kwargs:
Ignored input.
Returns:
Transformed data.
"""
return self.fit_transform(X, y)
__init__(self, method_name='RandomUnderSampler', config=None)
special
Instantiates an object that transforms the data into balanced groups according to the given method.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
method_name |
str |
Imbalanced learning strategy. Possible values with
|
'RandomUnderSampler' |
config |
dict |
Each strategy has a set of presets. This parameter is necessary to select the appropriate settings for the selected method. It is important that the key exactly matches the method_name. If no key is found for a method, it will be started with the default settings. Please do not use this parameter inside the 'hyperparmeters' to optimize it. |
None |
Source code in photonai/modelwrapper/imbalanced_data_transformer.py
def __init__(self, method_name: str = 'RandomUnderSampler', config: dict = None):
"""
Instantiates an object that transforms the data into balanced groups according to the given method.
Parameters:
method_name:
Imbalanced learning strategy. Possible values with
- an oversampling strategy are:
- ADASYN,
- BorderlineSMOTE,
- KMeansSMOTE,
- RandomOverSampler,
- SMOTE,
- SMOTENC,
- SVMSMOTE,
- an undersampling strategy are:
- ClusterCentroids,
- RandomUnderSampler,
- NearMiss,
- InstanceHardnessThreshold,
- CondensedNearestNeighbour,
- EditedNearestNeighbours,
- RepeatedEditedNearestNeighbours,
- AllKNN,
- NeighbourhoodCleaningRule,
- OneSidedSelection,
- a combined strategy are:
- SMOTEENN,
- SMOTETomek.
config:
Each strategy has a set of presets. This parameter is necessary
to select the appropriate settings for the selected method.
It is important that the key exactly matches the method_name.
If no key is found for a method, it will be started with the default settings.
Please do not use this parameter inside the 'hyperparmeters' to optimize it.
"""
if not __found__:
raise ModuleNotFoundError("Module imblearn not found or not installed as expected. "
"Please install the requirements.txt in PHOTON main folder.")
self.config = config
self._method_name = None
self.method_name = method_name
self.needs_y = True
fit_transform(self, X, y=None, **kwargs)
Call of the underlying imblearn.fit_resample(X, y).
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The input samples of shape [n_samples, n_features]. |
required |
y |
ndarray |
The input targets of shape [n_samples, 1]. |
None |
**kwargs |
Ignored input. |
{} |
Returns:
Type | Description |
---|---|
(<class 'numpy.ndarray'>, <class 'numpy.ndarray'>) |
Transformed data. |
Source code in photonai/modelwrapper/imbalanced_data_transformer.py
def fit_transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray):
"""
Call of the underlying imblearn.fit_resample(X, y).
Parameters:
X:
The input samples of shape [n_samples, n_features].
y:
The input targets of shape [n_samples, 1].
**kwargs:
Ignored input.
Returns:
Transformed data.
"""
return self.method.fit_resample(X, y)
transform(self, X, y=None, **kwargs)
Forwarding to the self.fit_transform method.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
X |
ndarray |
The input samples of shape [n_samples, n_features]. |
required |
y |
ndarray |
The input targets of shape [n_samples, 1]. |
None |
**kwargs |
Ignored input. |
{} |
Returns:
Type | Description |
---|---|
(<class 'numpy.ndarray'>, <class 'numpy.ndarray'>) |
Transformed data. |
Source code in photonai/modelwrapper/imbalanced_data_transformer.py
def transform(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> (np.ndarray, np.ndarray):
"""
Forwarding to the self.fit_transform method.
Parameters:
X:
The input samples of shape [n_samples, n_features].
y:
The input targets of shape [n_samples, 1].
**kwargs:
Ignored input.
Returns:
Transformed data.
"""
return self.fit_transform(X, y)