Skip to content

Commit 7baa097

Browse files
authored
Merge pull request #95 from wwu-mmll/feature/score_train_v2
Feature/score train v2
2 parents 9e08353 + 90683da commit 7baa097

File tree

5 files changed

+57
-19
lines changed

5 files changed

+57
-19
lines changed

photonai/base/hyperpipe.py

+14-4
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,9 @@ def __init__(self, name: Optional[str],
297297
cache_folder: str = None,
298298
nr_of_processes: int = 1,
299299
multi_threading: bool = True,
300-
allow_multidim_targets: bool = False):
300+
allow_multidim_targets: bool = False,
301+
raise_error: bool = False,
302+
score_train: bool = True):
301303
"""
302304
Initialize the object.
303305
@@ -420,6 +422,12 @@ def __init__(self, name: Optional[str],
420422
allow_multidim_targets:
421423
Allows multidimensional targets.
422424
425+
score_train:
426+
metrics for the train-set are only calculated if score_train is true.
427+
428+
raise_error:
429+
if true, errors in the inner fold are raised instead of suppressed as warnings.
430+
423431
"""
424432

425433
self.name = re.sub(r'\W+', '', name)
@@ -514,6 +522,8 @@ def __init__(self, name: Optional[str],
514522
self.permutation_id = permutation_id
515523
self.allow_multidim_targets = allow_multidim_targets
516524
self.is_final_fit = False
525+
self.score_train = score_train
526+
self.raise_error = raise_error
517527

518528
# ====================== Random Seed ===========================
519529
self.random_state = random_seed
@@ -941,8 +951,6 @@ def _finalize_optimization(self):
941951
else:
942952
self.results.best_config_feature_importances = feature_importances
943953

944-
self.results.best_config_feature_importances = feature_importances
945-
946954
# write backmapping file only if optimum_pipes inverse_transform works completely.
947955
# restriction: only a faulty inverse_transform is considered, missing ones are further ignored.
948956
# with warnings.catch_warnings(record=True) as w:
@@ -1087,7 +1095,9 @@ def fit(self, data: np.ndarray, targets: np.ndarray, **kwargs):
10871095
cache_folder=self.cache_folder,
10881096
cache_updater=self.recursive_cache_folder_propagation,
10891097
dummy_estimator=dummy_estimator,
1090-
result_obj=outer_fold)
1098+
result_obj=outer_fold,
1099+
score_train=self.score_train,
1100+
raise_error=self.raise_error)
10911101
# 2. monitor outputs
10921102
self.results.outer_folds.append(outer_fold)
10931103

photonai/processing/inner_folds.py

+28-11
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,8 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos,
6666
training: bool = False,
6767
cache_folder=None,
6868
cache_updater=None,
69-
scorer: Scorer = None):
69+
scorer: Scorer = None,
70+
score_train: bool = True):
7071

7172
self.params = specific_config
7273
self.pipe = pipe_ctor
@@ -81,6 +82,7 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos,
8182

8283
self.raise_error = raise_error
8384
self.training = training
85+
self.score_train = score_train
8486

8587
def fit(self, X, y, **kwargs):
8688
"""Iterates over cross-validation folds and trains the pipeline,
@@ -136,7 +138,8 @@ def fit(self, X, y, **kwargs):
136138
kwargs_cv_train),
137139
test_data=InnerFoldManager.JobData(test_X, test_y, test,
138140
kwargs_cv_test),
139-
scorer=self.scorer)
141+
scorer=self.scorer,
142+
score_train=self.score_train)
140143

141144
# only for unparallel processing
142145
# inform children in which inner fold we are
@@ -224,7 +227,8 @@ def compute_learning_curves(self, new_pipe, train_X, train_y, train, kwargs_cv_t
224227
callbacks=self.optimization_constraints,
225228
train_data=self.JobData(train_cut_X, train_cut_y, train_cut, train_cut_kwargs),
226229
test_data=self.JobData(test_X, test_y, test, kwargs_cv_test),
227-
scorer=self.scorer)
230+
scorer=self.scorer,
231+
score_train=self.score_train)
228232
curr_test_cut, curr_train_cut = InnerFoldManager.fit_and_score(job_data)
229233
learning_curves.append([self.cross_validation_infos.learning_curves_cut.values[i], curr_test_cut.metrics,
230234
curr_train_cut.metrics])
@@ -239,14 +243,15 @@ def __init__(self, X, y, indices, cv_kwargs):
239243

240244
class InnerCVJob:
241245

242-
def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer):
246+
def __init__(self, pipe, config, metrics, callbacks, train_data, test_data, scorer, score_train):
243247
self.pipe = pipe
244248
self.config = config
245249
self.metrics = metrics
246250
self.callbacks = callbacks
247251
self.train_data = train_data
248252
self.test_data = test_data
249253
self.scorer = scorer
254+
self.score_train = score_train
250255

251256
@staticmethod
252257
def update_config_item_with_inner_fold(config_item, fold_cnt, curr_train_fold, curr_test_fold, time_monitor,
@@ -344,27 +349,28 @@ def fit_and_score(job: InnerCVJob):
344349
# start fitting
345350
pipe.fit(job.train_data.X, job.train_data.y, **job.train_data.cv_kwargs)
346351

347-
logger.debug('Scoring Training Data')
352+
logger.debug('Scoring Test Data')
348353

349354
# score test data
350355
curr_test_fold = InnerFoldManager.score(pipe, job.test_data.X, job.test_data.y, job.metrics,
351356
indices=job.test_data.indices,
352357
scorer=job.scorer,
353358
**job.test_data.cv_kwargs)
354359

355-
logger.debug('Scoring Test Data')
360+
logger.debug('Scoring Training Data')
356361
# score train data
357362
curr_train_fold = InnerFoldManager.score(pipe, job.train_data.X, job.train_data.y, job.metrics,
358-
indices=job.train_data.indices,
359-
training=True,
360-
scorer=job.scorer, **job.train_data.cv_kwargs)
363+
indices=job.train_data.indices,
364+
training=True,
365+
score_train=job.score_train,
366+
scorer=job.scorer, **job.train_data.cv_kwargs)
361367

362368
return curr_test_fold, curr_train_fold
363369

364370
@staticmethod
365371
def score(estimator, X, y_true, metrics, indices=[],
366372
calculate_metrics: bool = True, training: bool = False,
367-
scorer: Scorer = None, **kwargs):
373+
dummy: bool = False, scorer: Scorer = None, score_train=True, **kwargs):
368374
"""Uses the pipeline to predict the given data,
369375
compare it to the truth values and calculate metrics
370376
@@ -410,7 +416,18 @@ def score(estimator, X, y_true, metrics, indices=[],
410416

411417
output_metrics = {}
412418

413-
if not training:
419+
if training and not score_train:
420+
scores = {}
421+
for metric in list(metrics.keys()):
422+
scores[metric] = 0
423+
return MDBScoreInformation(metrics=scores,
424+
score_duration=0,
425+
y_pred=list(np.zeros_like(y_true)),
426+
y_true=list(y_true),
427+
indices=np.asarray(indices).tolist(),
428+
probabilities=[])
429+
430+
if not training or (training and dummy):
414431
y_pred = estimator.predict(X, **kwargs)
415432
else:
416433
X, y_true_new, kwargs_new = estimator.transform(X, y_true, **kwargs)

photonai/processing/metrics.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ class Scorer:
2929
'precision': ('sklearn.metrics', 'precision_score', 'score'),
3030
'recall': ('sklearn.metrics', 'recall_score', 'score'),
3131
'auc': ('sklearn.metrics', 'roc_auc_score', 'score'),
32-
'sensitivity': ('photonai.processing.metrics', 'sensitivity', 'score'),
32+
'sensitivity': ('sklearn.metrics', 'recall_score', 'score'),
3333
'specificity': ('photonai.processing.metrics', 'specificity', 'score'),
34-
'balanced_accuracy': ('photonai.processing.metrics', 'balanced_accuracy', 'score'),
34+
'balanced_accuracy': ('sklearn.metrics', 'balanced_accuracy_score', 'score'),
3535
'categorical_accuracy': ('photonai.processing.metrics', 'categorical_accuracy_score', 'score'),
3636

3737
# Regression

photonai/processing/outer_folds.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -63,14 +63,18 @@ def __init__(self, pipe,
6363
cache_folder=None,
6464
cache_updater=None,
6565
dummy_estimator=None,
66-
result_obj=None):
66+
result_obj=None,
67+
raise_error=False,
68+
score_train: bool = True):
6769
self.outer_fold_id = outer_fold_id
6870
self.cross_validation_info = cross_validation_info
6971
self.scorer = Scorer(optimization_info.metrics)
7072
self.optimization_info = optimization_info
7173
self._pipe = pipe
7274
self.copy_pipe_fnc = self._pipe.copy_me
7375
self.dummy_estimator = dummy_estimator
76+
self.score_train = score_train
77+
self.raise_error = raise_error
7478

7579
self.cache_folder = cache_folder
7680
self.cache_updater = cache_updater
@@ -255,6 +259,7 @@ def fit(self, X, y=None, **kwargs):
255259
metrics=self.optimization_info.metrics,
256260
training=True,
257261
scorer=self.scorer,
262+
score_train=self.score_train,
258263
**self._validation_kwargs)
259264

260265
best_config_performance_mdb.training = train_score_mdb
@@ -308,7 +313,8 @@ def objective_function(self, current_config):
308313
self.cross_validation_info, self.outer_fold_id, self.constraint_objects,
309314
cache_folder=self.cache_folder,
310315
cache_updater=self.cache_updater,
311-
scorer=self.scorer)
316+
scorer=self.scorer,
317+
raise_error=self.raise_error)
312318

313319
# Test the configuration cross validated by inner_cv object
314320
current_config_mdb = hp.fit(self._validation_X, self._validation_y, **self._validation_kwargs)
@@ -385,7 +391,10 @@ def _fit_dummy(self):
385391
dummy_y = np.reshape(self._validation_y, (-1, 1))
386392
self.dummy_estimator.fit(dummy_y, self._validation_y)
387393
train_scores = InnerFoldManager.score(self.dummy_estimator, self._validation_X, self._validation_y,
394+
training=True,
395+
dummy=True,
388396
metrics=self.optimization_info.metrics,
397+
score_train=self.score_train,
389398
scorer=self.scorer)
390399

391400
# fill result tree with fold information
@@ -396,6 +405,7 @@ def _fit_dummy(self):
396405
test_scores = InnerFoldManager.score(self.dummy_estimator,
397406
self._test_X, self._test_y,
398407
metrics=self.optimization_info.metrics,
408+
score_train=self.score_train,
399409
scorer=self.scorer)
400410
print_metrics("DUMMY", test_scores.metrics)
401411
inner_fold.validation = test_scores

test/integration_tests/test_architecture.py

+1
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ def create_hyperpipes(metrics: list = None, inner_cv=KFold(n_splits=3, shuffle=T
6767
use_test_set=eval_final_performance,
6868
performance_constraints=performance_constraints,
6969
cache_folder=cache_folder,
70+
raise_error=True,
7071
verbosity=0)
7172
return pipe
7273

0 commit comments

Comments
 (0)