Stream and Access Additional Data
Numerous use-cases rely on data not contained in the feature matrix at runtime, e.g. when aiming to control for the
effect of covariates. In PHOTONAI, additional data can be streamed through the pipeline and is accessible for
all pipeline steps while - importantly - being matched to the (nested) cross-validation splits.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40 | import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold
from sklearn.base import BaseEstimator, ClassifierMixin
from photonai.base import Hyperpipe, PipelineElement
class AdditionalDataWrapper(BaseEstimator, ClassifierMixin):
def __init__(self):
self.needs_covariates = True
def fit(self, X, y, **kwargs):
if "true_predictions" in kwargs:
print("Found additional data")
return self
def predict(self, X, **kwargs):
y_true = kwargs["true_predictions"]
assert X.shape[0] == len(y_true)
return y_true
def save(self):
return None
my_pipe = Hyperpipe('additional_data_pipe',
metrics=['accuracy', 'precision', 'recall', 'balanced_accuracy'],
best_config_metric='accuracy',
outer_cv=KFold(n_splits=3),
inner_cv=KFold(n_splits=3),
verbosity=1,
project_folder='./tmp/')
my_pipe.add(PipelineElement('StandardScaler'))
my_pipe += PipelineElement.create("CustomWrapper", AdditionalDataWrapper(), hyperparameters={})
X, y = load_breast_cancer(return_X_y=True)
my_pipe.fit(X, y, true_predictions=np.array(y))
|