@@ -66,7 +66,8 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos,
66
66
training : bool = False ,
67
67
cache_folder = None ,
68
68
cache_updater = None ,
69
- scorer : Scorer = None ):
69
+ scorer : Scorer = None ,
70
+ score_train : bool = True ):
70
71
71
72
self .params = specific_config
72
73
self .pipe = pipe_ctor
@@ -81,6 +82,7 @@ def __init__(self, pipe_ctor, specific_config: dict, optimization_infos,
81
82
82
83
self .raise_error = raise_error
83
84
self .training = training
85
+ self .score_train = score_train
84
86
85
87
def fit (self , X , y , ** kwargs ):
86
88
"""Iterates over cross-validation folds and trains the pipeline,
@@ -136,7 +138,8 @@ def fit(self, X, y, **kwargs):
136
138
kwargs_cv_train ),
137
139
test_data = InnerFoldManager .JobData (test_X , test_y , test ,
138
140
kwargs_cv_test ),
139
- scorer = self .scorer )
141
+ scorer = self .scorer ,
142
+ score_train = self .score_train )
140
143
141
144
# only for unparallel processing
142
145
# inform children in which inner fold we are
@@ -224,7 +227,8 @@ def compute_learning_curves(self, new_pipe, train_X, train_y, train, kwargs_cv_t
224
227
callbacks = self .optimization_constraints ,
225
228
train_data = self .JobData (train_cut_X , train_cut_y , train_cut , train_cut_kwargs ),
226
229
test_data = self .JobData (test_X , test_y , test , kwargs_cv_test ),
227
- scorer = self .scorer )
230
+ scorer = self .scorer ,
231
+ score_train = self .score_train )
228
232
curr_test_cut , curr_train_cut = InnerFoldManager .fit_and_score (job_data )
229
233
learning_curves .append ([self .cross_validation_infos .learning_curves_cut .values [i ], curr_test_cut .metrics ,
230
234
curr_train_cut .metrics ])
@@ -239,14 +243,15 @@ def __init__(self, X, y, indices, cv_kwargs):
239
243
240
244
class InnerCVJob :
241
245
242
- def __init__ (self , pipe , config , metrics , callbacks , train_data , test_data , scorer ):
246
+ def __init__ (self , pipe , config , metrics , callbacks , train_data , test_data , scorer , score_train ):
243
247
self .pipe = pipe
244
248
self .config = config
245
249
self .metrics = metrics
246
250
self .callbacks = callbacks
247
251
self .train_data = train_data
248
252
self .test_data = test_data
249
253
self .scorer = scorer
254
+ self .score_train = score_train
250
255
251
256
@staticmethod
252
257
def update_config_item_with_inner_fold (config_item , fold_cnt , curr_train_fold , curr_test_fold , time_monitor ,
@@ -344,27 +349,28 @@ def fit_and_score(job: InnerCVJob):
344
349
# start fitting
345
350
pipe .fit (job .train_data .X , job .train_data .y , ** job .train_data .cv_kwargs )
346
351
347
- logger .debug ('Scoring Training Data' )
352
+ logger .debug ('Scoring Test Data' )
348
353
349
354
# score test data
350
355
curr_test_fold = InnerFoldManager .score (pipe , job .test_data .X , job .test_data .y , job .metrics ,
351
356
indices = job .test_data .indices ,
352
357
scorer = job .scorer ,
353
358
** job .test_data .cv_kwargs )
354
359
355
- logger .debug ('Scoring Test Data' )
360
+ logger .debug ('Scoring Training Data' )
356
361
# score train data
357
362
curr_train_fold = InnerFoldManager .score (pipe , job .train_data .X , job .train_data .y , job .metrics ,
358
- indices = job .train_data .indices ,
359
- training = True ,
360
- scorer = job .scorer , ** job .train_data .cv_kwargs )
363
+ indices = job .train_data .indices ,
364
+ training = True ,
365
+ score_train = job .score_train ,
366
+ scorer = job .scorer , ** job .train_data .cv_kwargs )
361
367
362
368
return curr_test_fold , curr_train_fold
363
369
364
370
@staticmethod
365
371
def score (estimator , X , y_true , metrics , indices = [],
366
372
calculate_metrics : bool = True , training : bool = False ,
367
- scorer : Scorer = None , ** kwargs ):
373
+ dummy : bool = False , scorer : Scorer = None , score_train = True , ** kwargs ):
368
374
"""Uses the pipeline to predict the given data,
369
375
compare it to the truth values and calculate metrics
370
376
@@ -410,7 +416,18 @@ def score(estimator, X, y_true, metrics, indices=[],
410
416
411
417
output_metrics = {}
412
418
413
- if not training :
419
+ if training and not score_train :
420
+ scores = {}
421
+ for metric in list (metrics .keys ()):
422
+ scores [metric ] = 0
423
+ return MDBScoreInformation (metrics = scores ,
424
+ score_duration = 0 ,
425
+ y_pred = list (np .zeros_like (y_true )),
426
+ y_true = list (y_true ),
427
+ indices = np .asarray (indices ).tolist (),
428
+ probabilities = [])
429
+
430
+ if not training or (training and dummy ):
414
431
y_pred = estimator .predict (X , ** kwargs )
415
432
else :
416
433
X , y_true_new , kwargs_new = estimator .transform (X , y_true , ** kwargs )
0 commit comments