- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/IOparser/config_io_classifier.html b/WORC/doc/_build/html/_modules/WORC/IOparser/config_io_classifier.html
index baeae203..7186655b 100644
--- a/WORC/doc/_build/html/_modules/WORC/IOparser/config_io_classifier.html
+++ b/WORC/doc/_build/html/_modules/WORC/IOparser/config_io_classifier.html
@@ -8,7 +8,7 @@
- WORC.IOparser.config_io_classifier — WORC 3.6.0 documentation
+ WORC.IOparser.config_io_classifier — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands + © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands + © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands + © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/WORC.html b/WORC/doc/_build/html/_modules/WORC/WORC.html
index 526064a5..f0173fa3 100644
--- a/WORC/doc/_build/html/_modules/WORC/WORC.html
+++ b/WORC/doc/_build/html/_modules/WORC/WORC.html
@@ -8,7 +8,7 @@
- WORC.WORC — WORC 3.6.0 documentation
+ WORC.WORC — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
config['General']['AssumeSameImageAndMaskMetadata'] = 'False'
config['General']['ComBat'] = 'False'
config['General']['Fingerprint'] = 'True'
+ config['General']['DoTestNRSNEns'] = 'False'
# Fingerprinting
config['Fingerprinting'] = dict()
@@ -517,6 +517,7 @@ Source code for WORC.WORC
config['Imputation']['use'] = 'True'
config['Imputation']['strategy'] = 'mean, median, most_frequent, constant, knn'
config['Imputation']['n_neighbors'] = '5, 5'
+ config['Imputation']['skipallNaN'] = 'True'
# Feature scaling options
config['FeatureScaling'] = dict()
@@ -661,7 +662,8 @@ Source code for WORC.WORC
config['HyperOptimization']['maxlen'] = '100'
config['HyperOptimization']['ranking_score'] = 'test_score'
config['HyperOptimization']['memory'] = '3G'
- config['HyperOptimization']['refit_workflows'] = 'False'
+ config['HyperOptimization']['refit_training_workflows'] = 'False'
+ config['HyperOptimization']['refit_validation_workflows'] = 'False'
# SMAC options
config['SMAC'] = dict()
@@ -827,9 +829,9 @@ Source code for WORC.WORC
# Optional SMAC output
if self.configs[0]['SMAC']['use'] == 'True':
- self.sink_smac_results = self.network.create_sink('JsonFile', id='smac_results',
- step_id='general_sinks')
- self.sink_smac_results.input = self.classify.outputs['smac_results']
+ self.sink_smac_results = self.network.create_sink('JsonFile', id='smac_results',
+ step_id='general_sinks')
+ self.sink_smac_results.input = self.classify.outputs['smac_results']
if self.TrainTest:
# FIXME: the naming here is ugly
@@ -1107,9 +1109,15 @@ Source code for WORC.WORC
elif self.segmode == 'Register':
# ---------------------------------------------
# Registration nodes: Align segmentation of first
- # modality to others using registration ith Elastix
+ # modality to others using registration with Elastix
self.add_elastix(label, nmod)
+ # Add to fingerprinting if required
+ if self.configs[0]['General']['Fingerprint'] == 'True':
+ # Since there are no segmentations yet of this modality, just use those of the first, provided modality
+ self.links_fingerprinting[f'{label}_segmentations'] = self.network.create_link(self.converters_seg_train[self.modlabels[0]].outputs['image'], self.node_fingerprinters[label].inputs['segmentations_train'])
+ self.links_fingerprinting[f'{label}_segmentations'].collapse = 'train'
+
# -----------------------------------------------------
# Optionally, add segmentix, the in-house segmentation
# processor of WORC
@@ -1242,7 +1250,7 @@ Source code for WORC.WORC
self.links_fingerprinting['classification'].collapse = 'train'
else:
- raise WORCexceptions.WORCIOError("Please provide labels.")
+ raise WORCexceptions.WORCIOError("Please provide labels for training, i.e., WORC.labels_train or SimpleWORC.labels_from_this_file.")
else:
raise WORCexceptions.WORCIOError("Please provide either images or features.")
@@ -1566,7 +1574,7 @@ Source code for WORC.WORC
self.sources_segmentations_train[label] =\
self.network.create_source('ITKImageFile',
id='segmentations_train_' + label,
- node_group='input',
+ node_group='train',
step_id='train_sources')
self.converters_seg_train[label] =\
@@ -1583,7 +1591,7 @@ Source code for WORC.WORC
self.sources_segmentations_test[label] =\
self.network.create_source('ITKImageFile',
id='segmentations_test_' + label,
- node_group='input',
+ node_group='test',
step_id='test_sources')
self.converters_seg_test[label] =\
@@ -1806,11 +1814,6 @@ Source code for WORC.WORC
self.calcfeatures_test[label][i_node].inputs['segmentation'] =\
self.transformix_seg_nodes_test[label].outputs['image']
- # Add to fingerprinting if required
- if self.configs[0]['General']['Fingerprint'] == 'True':
- self.links_fingerprinting[f'{label}_segmentations'] = self.network.create_link(self.transformix_seg_nodes_train[label].outputs['image'], self.node_fingerprinters[label].inputs['segmentations_train'])
- self.links_fingerprinting[f'{label}_segmentations'].collapse = 'train'
-
# Save outputfor the training set
self.sinks_transformations_train[label] =\
self.network.create_sink('ElastixTransformFile',
@@ -2202,7 +2205,7 @@ Source code for WORC.WORC
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/addexceptions.html b/WORC/doc/_build/html/_modules/WORC/addexceptions.html
index dbd4270e..417dd794 100644
--- a/WORC/doc/_build/html/_modules/WORC/addexceptions.html
+++ b/WORC/doc/_build/html/_modules/WORC/addexceptions.html
@@ -8,7 +8,7 @@
- WORC.addexceptions — WORC 3.6.0 documentation
+ WORC.addexceptions — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -257,7 +256,7 @@ Source code for WORC.addexceptions
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/AdvancedSampler.html b/WORC/doc/_build/html/_modules/WORC/classification/AdvancedSampler.html
index 0e554819..cc8f52b1 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/AdvancedSampler.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/AdvancedSampler.html
@@ -8,7 +8,7 @@
- WORC.classification.AdvancedSampler — WORC 3.6.0 documentation
+ WORC.classification.AdvancedSampler — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -393,7 +392,7 @@ Source code for WORC.classification.AdvancedSampler
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/ObjectSampler.html b/WORC/doc/_build/html/_modules/WORC/classification/ObjectSampler.html
index 6c8bf1fa..2d9f2d6e 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/ObjectSampler.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/ObjectSampler.html
@@ -8,7 +8,7 @@
- WORC.classification.ObjectSampler — WORC 3.6.0 documentation
+ WORC.classification.ObjectSampler — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -360,7 +359,7 @@ Source code for WORC.classification.ObjectSampler
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/RankedSVM.html b/WORC/doc/_build/html/_modules/WORC/classification/RankedSVM.html
index 8485a0fd..f352c9d8 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/RankedSVM.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/RankedSVM.html
@@ -8,7 +8,7 @@
- WORC.classification.RankedSVM — WORC 3.6.0 documentation
+ WORC.classification.RankedSVM — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -929,7 +928,7 @@ Source code for WORC.classification.RankedSVM
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/SearchCV.html b/WORC/doc/_build/html/_modules/WORC/classification/SearchCV.html
index 237308ee..1128e17e 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/SearchCV.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/SearchCV.html
@@ -8,7 +8,7 @@
- WORC.classification.SearchCV — WORC 3.6.0 documentation
+ WORC.classification.SearchCV — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -165,7 +164,7 @@
Source code for WORC.classification.SearchCV
#!/usr/bin/env python
-# Copyright 2016-2021 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2022 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -529,8 +528,8 @@ Source code for WORC.classification.SearchCV
refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs',
random_state=None, error_score='raise', return_train_score=True,
n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G',
- ranking_score='test_score', refit_workflows=False,
- ensemble_validation_score=None):
+ ranking_score='test_score', refit_training_workflows=False,
+ ensemble_validation_score=None, refit_validation_workflows=False):
"""Initialize SearchCV Object."""
# Added for fastr and joblib executions
self.param_distributions = param_distributions
@@ -557,8 +556,10 @@ Source code for WORC.classification.SearchCV
self.return_train_score = return_train_score
self.maxlen = maxlen
self.ranking_score = ranking_score
- self.refit_workflows = refit_workflows
+ self.refit_training_workflows = refit_training_workflows
+ self.refit_validation_workflows = refit_validation_workflows
self.fitted_workflows = list()
+ self.fitted_validation_workflows = list()
# Only for WORC Paper
self.test_RS = True
@@ -804,7 +805,8 @@ Source code for WORC.classification.SearchCV
[docs] def process_fit(self, n_splits, parameters_all,
test_sample_counts, test_score_dicts,
train_score_dicts, fit_time, score_time, cv_iter,
- X, y, fitted_workflows=None, use_smac=False):
+ X, y, fitted_workflows=list(), fitted_validation_workflows=list(),
+ use_smac=False):
"""Process a fit.
Process the outcomes of a SearchCV fit and find the best settings
@@ -983,7 +985,7 @@ Source code for WORC.classification.SearchCV
candidate_params_all = np.asarray(candidate_params_all)[bestindices].tolist()
for k in results.keys():
results[k] = results[k][bestindices]
- n_candidates = len(candidate_params_all)
+
results['params'] = candidate_params_all
# Calculate and store the total_fit_time of this train/test CV
@@ -1010,17 +1012,25 @@ Source code for WORC.classification.SearchCV
self.scorer_ = scorers if self.multimetric_ else scorers['score']
# Refit the top performing workflows on the full training dataset
- if self.refit_workflows:
+ if self.refit_training_workflows and fitted_workflows:
# Select only from one train-val split, as they are identical
fitted_workflows = fitted_workflows[:pipelines_per_split]
# Sort according to best indices
fitted_workflows = [fitted_workflows[i] for i in bestindices]
- # Remove None workflows
- fitted_workflows = [f for f in fitted_workflows if f is not None]
-
self.fitted_workflows = fitted_workflows
+
+ if self.refit_validation_workflows and fitted_validation_workflows:
+ # Select from all train-val splits the best indices
+ bestindices_all = list()
+ for j in range(len(cv_iter)):
+ bestindices_all.extend([i + n_candidates * j for i in bestindices])
+
+ fitted_validation_workflows =\
+ [fitted_validation_workflows[i] for i in bestindices_all]
+
+ self.fitted_validation_workflows = fitted_validation_workflows
return self
@@ -1081,7 +1091,8 @@ Source code for WORC.classification.SearchCV
return_estimator=True,
error_score=self.error_score,
verbose=verbose,
- return_all=True)
+ return_all=True,
+ skip=True)
# Associate best options with new fits
(save_data, GroupSel, VarSel, SelectModel, feature_labels, scalers,
@@ -1171,7 +1182,7 @@ Source code for WORC.classification.SearchCV
if scoring is None:
scoring = self.scoring
- # Get settings for best 100 estimators
+ # Get settings for best estimators
parameters_all = self.cv_results_['params']
n_classifiers = len(parameters_all)
n_iter = len(self.cv_iter)
@@ -1193,8 +1204,16 @@ Source code for WORC.classification.SearchCV
else:
# Refit the models and compute the predictions on the validation sets
if verbose:
- print('Precomputing scores on training and validation set for ensembling.')
+ print('\t - Precomputing scores on training and validation set for ensembling.')
+ if self.fitted_validation_workflows:
+ print('\t - Detected already fitted train-val workflows.')
+
+ # Create the ground truth
Y_valid_truth = list()
+ for it, (train, valid) in enumerate(self.cv_iter):
+ Y_valid_truth.append(Y_train[valid])
+
+ # Precompute the scores of all estimators
performances = list()
all_predictions = list()
ensemble_configurations = list()
@@ -1204,77 +1223,98 @@ Source code for WORC.classification.SearchCV
predictions_iter = np.zeros((n_iter, prediction_length))
for it, (train, valid) in enumerate(self.cv_iter):
- predictions = list()
- # Start with storing the ground truth
- if num == 0:
- Y_valid_truth.append(Y_train[valid])
-
- new_estimator = clone(base_estimator)
-
- # Fit the preprocessors of the pipeline
- out = fit_and_score(X_train, Y_train, scoring,
- train, valid, p_all,
- return_all=True)
- (save_data, GroupSel, VarSel, SelectModel, feature_labels, scalers,
- encoders, Imputers, PCAs, StatisticalSel, ReliefSel, Sampler) = out
- new_estimator.best_groupsel = GroupSel
- new_estimator.best_scaler = scalers
- new_estimator.best_varsel = VarSel
- new_estimator.best_modelsel = SelectModel
- new_estimator.best_preprocessor = None
- new_estimator.best_imputer = Imputers
- new_estimator.best_encoder = encoders
- new_estimator.best_pca = PCAs
- new_estimator.best_featlab = feature_labels
- new_estimator.best_statisticalsel = StatisticalSel
- new_estimator.best_reliefsel = ReliefSel
- new_estimator.best_Sampler = Sampler
-
- # Use the fitted preprocessors to preprocess the features
- X_train_values = np.asarray([x[0] for x in X_train])
- processed_X, processed_y = new_estimator.preprocess(X_train_values[train],
- Y_train[train],
- training=True)
- # Check if there are features left
- (patients, features_left) = np.shape(processed_X)
- if features_left == 0:
- print('no features left' + '\n')
- # No features are left; do not consider this pipeline for the ensemble
- break
- else:
+ def getpredictions():
+ new_estimator = clone(base_estimator)
+
+ # Fit the preprocessors of the pipeline
+ out = fit_and_score(X_train, Y_train, scoring,
+ train, valid, p_all,
+ return_all=True)
+ (save_data, GroupSel, VarSel, SelectModel, feature_labels, scalers,
+ encoders, Imputers, PCAs, StatisticalSel, ReliefSel, Sampler) = out
+ new_estimator.best_groupsel = GroupSel
+ new_estimator.best_scaler = scalers
+ new_estimator.best_varsel = VarSel
+ new_estimator.best_modelsel = SelectModel
+ new_estimator.best_preprocessor = None
+ new_estimator.best_imputer = Imputers
+ new_estimator.best_encoder = encoders
+ new_estimator.best_pca = PCAs
+ new_estimator.best_featlab = feature_labels
+ new_estimator.best_statisticalsel = StatisticalSel
+ new_estimator.best_reliefsel = ReliefSel
+ new_estimator.best_Sampler = Sampler
+
+ # Use the fitted preprocessors to preprocess the features
+ X_train_values = np.asarray([x[0] for x in X_train])
+ processed_X, processed_y = new_estimator.preprocess(X_train_values[train],
+ Y_train[train],
+ training=True)
+
+ # Check if there are features left
+ (patients, features_left) = np.shape(processed_X)
+ if features_left == 0:
+ print('no features left' + '\n')
+ # No features are left; do not consider this pipeline for the ensemble
+ return None
+
# Construct and fit the classifier
best_estimator = cc.construct_classifier(p_all)
best_estimator.fit(processed_X, processed_y)
new_estimator.best_estimator_ = best_estimator
predictions = new_estimator.predict_proba(X_train_values[valid])
+ return predictions
- # Only take the probabilities for the second class
- predictions = predictions[:, 1]
-
- # Store the predictions on this split
- #predictions_iter.append(predictions)
- predictions_iter[it, :] = predictions
-
- # Compute and store the performance on this split
- performances_iter.append(compute_performance(scoring,
- Y_train[valid],
- predictions))
-
- # print('fitandscore: ' + str(out[0][1]) + ' and computed: ' +
- # str(compute_performance(scoring, Y_train[valid], predictions)) + '\n')
+ predictions = list()
+ # Start with storing the ground truth
+ if self.fitted_validation_workflows:
+ # Use already fitted workflow
+ estimator = self.fitted_validation_workflows[num + it * self.maxlen]
+ if estimator is None:
+ # Estimator is none, refit and get predictions
+ predictions = getpredictions()
+ else:
+ X_train_values = np.asarray([x[0] for x in X_train])
+ try:
+ predictions = estimator.predict_proba(X_train_values[valid])
+ except (NotFittedError, ValueError, AttributeError):
+ # Estimator cannot be fitted properly, hence skip it
+ predictions = None
+
+ else:
+ predictions = getpredictions()
- # At the end of the last iteration, store the results of this pipeline
- if it == (n_iter - 1):
- # Add the pipeline to the list
- ensemble_configurations.append(p_all)
- # Store the predictions
- all_predictions.append(predictions_iter)
- # Store the performance
- performances.append(np.mean(performances_iter))
+ if predictions is None:
+ # Estimator cannot be fitted properly, hence skip it
+ break
+
+ # Only take the probabilities for the second class
+ predictions = predictions[:, 1]
+
+ # Store the predictions on this split
+ predictions_iter[it, :] = predictions
+
+ # Compute and store the performance on this split
+ performances_iter.append(compute_performance(scoring,
+ Y_train[valid],
+ predictions))
+
+ # print('fitandscore: ' + str(out[0][1]) + ' and computed: ' +
+ # str(compute_performance(scoring, Y_train[valid], predictions)) + '\n')
+
+ # At the end of the last iteration, store the results of this pipeline
+ if it == (n_iter - 1):
+ # Add the pipeline to the list
+ ensemble_configurations.append(p_all)
+ # Store the predictions
+ all_predictions.append(predictions_iter)
+ # Store the performance
+ performances.append(np.mean(performances_iter))
# Update the parameters
parameters_all = ensemble_configurations
n_classifiers = len(ensemble_configurations)
+
# Construct the array of final predictions
base_Y_valid_score = np.zeros((n_iter, n_classifiers, prediction_length))
for iter in range(n_iter):
@@ -1326,7 +1366,7 @@ Source code for WORC.classification.SearchCV
perf = compute_performance(scoring, Y_valid_truth[n_crossval], y_valid_score_new)
performances_temp[n_crossval] = perf
- # Check which ensemble should be in the ensemble to maximally improve
+ # Check which estimator should be in the ensemble to maximally improve
new_performance = np.mean(performances_temp)
performances_n_class.append(new_performance)
best_index = sortedindices[iteration]
@@ -1337,10 +1377,13 @@ Source code for WORC.classification.SearchCV
N_models = performances_n_class.index(new_performance) + 1 # +1 due to python indexing
ensemble = ensemble[0:N_models]
best_performance = new_performance
+
+ self.ensemble_validation_score = best_performance
- print(f"Ensembling best {scoring}: {best_performance}.")
- print(f"Single estimator best {scoring}: {single_estimator_performance}.")
- print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
+ if verbose:
+ print(f"Ensembling best {scoring}: {best_performance}.")
+ print(f"Single estimator best {scoring}: {single_estimator_performance}.")
+ print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
elif method == 'ForwardSelection':
@@ -1351,6 +1394,7 @@ Source code for WORC.classification.SearchCV
Y_valid_score = copy.deepcopy(base_Y_valid_score)
if verbose:
print(f"Iteration: {iteration}, best {scoring}: {new_performance}.")
+
best_performance = new_performance
if iteration > 1:
@@ -1387,10 +1431,13 @@ Source code for WORC.classification.SearchCV
best_index = performances_temp.index(new_performance)
iteration += 1
- # Print the performance gain
- print(f"Ensembling best {scoring}: {best_performance}.")
- print(f"Single estimator best {scoring}: {single_estimator_performance}.")
- print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
+ self.ensemble_validation_score = best_performance
+
+ if verbose:
+ # Print the performance gain
+ print(f"Ensembling best {scoring}: {best_performance}.")
+ print(f"Single estimator best {scoring}: {single_estimator_performance}.")
+ print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
elif method == 'Caruana':
if verbose:
@@ -1443,11 +1490,13 @@ Source code for WORC.classification.SearchCV
optimal_N_models = best_ensemble_scores.index(optimal_ensemble_performance) + 1
ensemble = ensemble[0:optimal_N_models]
best_performance = optimal_ensemble_performance
+ self.ensemble_validation_score = best_performance
- # Print the performance gain
- print(f"Ensembling best {scoring}: {best_performance}.")
- print(f"Single estimator best {scoring}: {single_estimator_performance}.")
- print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
+ if verbose:
+ # Print the performance gain
+ print(f"Ensembling best {scoring}: {best_performance}.")
+ print(f"Single estimator best {scoring}: {single_estimator_performance}.")
+ print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
elif method == 'Bagging':
if verbose:
@@ -1464,8 +1513,8 @@ Source code for WORC.classification.SearchCV
while iteration < 20:
Y_valid_score = copy.deepcopy(base_Y_valid_score)
- if verbose:
- print(f"Iteration: {iteration}, best {scoring}: {new_performance}.")
+ # if verbose:
+ # print(f"Iteration: {iteration}, best {scoring}: {new_performance}.")
if iteration > 1:
for num in range(0, n_iter):
@@ -1506,14 +1555,21 @@ Source code for WORC.classification.SearchCV
# Select the optimal ensemble size
optimal_ensemble_performance = max(best_ensemble_scores)
optimal_N_models = best_ensemble_scores.index(optimal_ensemble_performance) + 1
+
# Add the best ensemble of this bagging iteration to the final ensemble
bag_ensemble = bag_ensemble[0:optimal_N_models]
for model in bag_ensemble:
ensemble.append(model)
+
best_performance = optimal_ensemble_performance
- # Print the performance gain
- print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
+ self.ensemble_validation_score = best_performance
+
+ if verbose:
+ # Print the performance gain
+ print(f"Ensembling best {scoring}: {best_performance}.")
+ print(f"Single estimator best {scoring}: {single_estimator_performance}.")
+ print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
else:
print(f'[WORC WARNING] No valid ensemble method given: {method}. Not ensembling')
@@ -1523,60 +1579,44 @@ Source code for WORC.classification.SearchCV
# First create and score the ensemble on the validation set
# If we only want the best solution, we use the score from cv_results_
+ # For not Single or Top_N, the score has already been computed during fitting
if method == 'Single':
self.ensemble_validation_score = self.cv_results_['mean_test_score'][0]
elif method == 'top_N':
- self.ensemble_validation_score = [self.cv_results_['mean_test_score'][i] for i in ensemble]
- else:
- selected_params = [parameters_all[i] for i in ensemble]
- val_split_scores = []
- for train, valid in self.cv_iter:
- estimators = list()
- for enum, p_all in enumerate(selected_params):
- new_estimator = clone(base_estimator)
-
- new_estimator.refit_and_score(X_train, Y_train, p_all,
- train, valid,
- verbose=False)
-
- estimators.append(new_estimator)
-
- new_estimator = clone(base_estimator)
- new_estimator.ensemble = Ensemble(estimators)
- new_estimator.best_estimator_ = new_estimator.ensemble
- # Calculate and store the final performance of the ensemble
- # on this validation split
- X_train_values = np.asarray([x[0] for x in X_train])
- predictions = new_estimator.predict(X_train_values[valid])
- val_split_scores.append(compute_performance(scoring,
- Y_train[valid],
- predictions))
-
- validation_score = np.mean(val_split_scores)
- self.ensemble_validation_score = validation_score
-
- print('Final ensemble validation score: ' + str(self.ensemble_validation_score))
+ self.ensemble_validation_score = np.mean([self.cv_results_['mean_test_score'][i] for i in ensemble])
+
+ if verbose:
+ print('Final ensemble validation score: ' + str(self.ensemble_validation_score))
# Create the ensemble --------------------------------------------------
train = np.arange(0, len(X_train))
if self.fitted_workflows:
# Simply select the required estimators
- print('\t - Detected already fitted workflows.')
+ print('\t - Detected already fitted train-test workflows.')
estimators = list()
- for i in ensemble:
+ for enum in ensemble:
try:
# Try a prediction to see if estimator is truly fitted
- self.fitted_workflows[i].predict(np.asarray([X_train[0][0], X_train[1][0]]))
- estimators.append(self.fitted_workflows[i])
- except (NotFittedError, ValueError):
- print(f'\t\t - Estimator {i} not fitted (correctly) yet, refit.')
- estimator = self.fitted_workflows[i]
+ self.fitted_workflows[enum].predict(np.asarray([X_train[0][0], X_train[1][0]]))
+ estimators.append(self.fitted_workflows[enum])
+ except (NotFittedError, ValueError, AttributeError):
+ print(f'\t\t - Estimator {enum} not fitted (correctly) yet, refit.')
+ if self.fitted_workflows[enum] is not None:
+ estimator = self.fitted_workflows[enum]
+ else:
+ estimator = clone(base_estimator)
+
estimator.refit_and_score(X_train, Y_train,
- parameters_all[i],
- train, train,
- verbose=False)
-
- estimators.append(estimator)
+ parameters_all[enum],
+ train, train)
+
+ try:
+ # Try a prediction to see if estimator is truly fitted
+ estimator.predict(np.asarray([X_train[0][0], X_train[1][0]]))
+ estimators.append(estimator)
+ except (NotFittedError, ValueError):
+ print(f'\t\t - Estimator {enum} could not be fitted (correctly), do not include in ensemble.')
+
else:
# Create the ensemble trained on the full training set
parameters_all = [parameters_all[i] for i in ensemble]
@@ -1584,7 +1624,8 @@ Source code for WORC.classification.SearchCV
nest = len(ensemble)
for enum, p_all in enumerate(parameters_all):
# Refit a SearchCV object with the provided parameters
- print(f"Refitting estimator {enum + 1} / {nest}.")
+ if verbose:
+ print(f"Refitting estimator {enum + 1} / {nest}.")
base_estimator = clone(base_estimator)
# Check if we need to create a multiclass estimator
@@ -1601,33 +1642,32 @@ Source code for WORC.classification.SearchCV
estimators.append(base_estimator)
except (NotFittedError, ValueError):
print(f'\t\t - Estimator {enum} could not be fitted (correctly), do not include in ensemble.')
- if enum + 1 == nest and not estimators:
- print(f'\t\t - Reached end of ensemble ({enum + 1}), but ensemble is empty, thus go on untill we find an estimator that works')
- while not estimators:
- # We cannot have an empy ensemble, thus go on untill we find an estimator that works
- enum += 1
- p_all = self.cv_results_['params'][enum]
+
+ if not estimators:
+ print(f'\t\t - Ensemble is empty, thus go on untill we find an estimator that works and that is the final ensemble.')
+ while not estimators:
+ # We cannot have an empy ensemble, thus go on untill we find an estimator that works
+ enum += 1
+ p_all = self.cv_results_['params'][enum]
- # Refit a SearchCV object with the provided parameters
- base_estimator = clone(base_estimator)
+ # Refit a SearchCV object with the provided parameters
+ base_estimator = clone(base_estimator)
- # Check if we need to create a multiclass estimator
- base_estimator.refit_and_score(X_train, Y_train, p_all,
- train, train,
- verbose=False)
+ # Check if we need to create a multiclass estimator
+ base_estimator.refit_and_score(X_train, Y_train, p_all,
+ train, train,
+ verbose=False)
- # Determine whether to overfit the feature scaling on the test set
- base_estimator.overfit_scaler = overfit_scaler
+ # Determine whether to overfit the feature scaling on the test set
+ base_estimator.overfit_scaler = overfit_scaler
- try:
- # Try a prediction to see if estimator is truly fitted
- base_estimator.predict(np.asarray([X_train[0][0], X_train[1][0]]))
- estimators.append(base_estimator)
- except (NotFittedError, ValueError):
- pass
- print(f'\t\t - Needed estimator {enum}.')
- else:
- pass
+ try:
+ # Try a prediction to see if estimator is truly fitted
+ base_estimator.predict(np.asarray([X_train[0][0], X_train[1][0]]))
+ estimators.append(base_estimator)
+ except (NotFittedError, ValueError):
+ pass
+ print(f'\t\t - Needed estimator {enum}.')
self.ensemble = Ensemble(estimators)
self.best_estimator_ = self.ensemble
@@ -1769,7 +1809,8 @@ Source code for WORC.classification.SearchCV
'return_n_test_samples',
'return_times', 'return_parameters',
'return_estimator',
- 'error_score', 'return_all', 'refit_workflows']
+ 'error_score', 'return_all', 'refit_training_workflows',
+ 'refit_validation_workflows']
verbose = False
return_n_test_samples = True
@@ -1784,7 +1825,8 @@ Source code for WORC.classification.SearchCV
return_parameters,
return_estimator,
self.error_score,
- return_all, self.refit_workflows],
+ return_all, self.refit_training_workflows,
+ self.refit_validation_workflows],
index=estimator_labels,
name='estimator Data')
fname = 'estimatordata.hdf5'
@@ -1795,10 +1837,10 @@ Source code for WORC.classification.SearchCV
# Create the fastr network
network = fastr.create_network('WORC_GridSearch_' + name)
- estimator_data = network.create_source('HDF5', id='estimator_source')
- traintest_data = network.create_source('HDF5', id='traintest')
- parameter_data = network.create_source('JsonFile', id='parameters')
- sink_output = network.create_sink('HDF5', id='output')
+ estimator_data = network.create_source('HDF5', id='estimator_source', resources=ResourceLimit(memory='4G'))
+ traintest_data = network.create_source('HDF5', id='traintest', resources=ResourceLimit(memory='4G'))
+ parameter_data = network.create_source('JsonFile', id='parameters', resources=ResourceLimit(memory='4G'))
+ sink_output = network.create_sink('HDF5', id='output', resources=ResourceLimit(memory='6G'))
fitandscore =\
network.create_node('worc/fitandscore:1.0',
@@ -1848,25 +1890,53 @@ Source code for WORC.classification.SearchCV
# if one choose to see train score, "out" will contain train score info
if self.return_train_score:
- if self.refit_workflows:
- (train_scores, test_scores, test_sample_counts,
- fit_time, score_time, parameters_all, fitted_workflows) =\
- zip(*save_data)
+ if self.refit_training_workflows:
+ if self.refit_validation_workflows:
+ (train_scores, test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all,
+ fitted_workflows, fitted_validation_workflows) =\
+ zip(*save_data)
+ else:
+ fitted_validation_workflows = None
+ (train_scores, test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all,
+ fitted_workflows) =\
+ zip(*save_data)
else:
fitted_workflows = None
- (train_scores, test_scores, test_sample_counts,
- fit_time, score_time, parameters_all) =\
- zip(*save_data)
+ if self.refit_validation_workflows:
+ (train_scores, test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all,
+ fitted_validation_workflows) =\
+ zip(*save_data)
+ else:
+ fitted_validation_workflows = None
+ (train_scores, test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all) =\
+ zip(*save_data)
else:
- if self.refit_workflows:
- (test_scores, test_sample_counts,
- fit_time, score_time, parameters_all, fitted_workflows) =\
- zip(*save_data)
+ if self.refit_training_workflows:
+ if self.refit_validation_workflows:
+ (test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all, fitted_workflows,
+ fitted_validation_workflows) =\
+ zip(*save_data)
+ else:
+ fitted_validation_workflows = None
+ (test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all, fitted_workflows) =\
+ zip(*save_data)
else:
fitted_workflows = None
- (test_scores, test_sample_counts,
- fit_time, score_time, parameters_all) =\
- zip(*save_data)
+ if self.refit_validation_workflows:
+ (test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all,
+ fitted_validation_workflows) =\
+ zip(*save_data)
+ else:
+ (test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all) =\
+ zip(*save_data)
# Remove the temporary folder used
if name != 'DEBUG_0':
@@ -1883,7 +1953,8 @@ Source code for WORC.classification.SearchCV
score_time=score_time,
cv_iter=cv_iter,
X=X, y=y,
- fitted_workflows=fitted_workflows)
+ fitted_workflows=fitted_workflows,
+ fitted_validation_workflows=fitted_validation_workflows)
[docs]class RandomizedSearchCVfastr(BaseSearchCVfastr):
@@ -2100,7 +2171,8 @@ Source code for WORC.classification.SearchCV
verbose=0, pre_dispatch='2*n_jobs', random_state=None,
error_score='raise', return_train_score=True,
n_jobspercore=100, fastr_plugin=None, memory='2G', maxlen=100,
- ranking_score='test_score', refit_workflows=False):
+ ranking_score='test_score', refit_training_workflows=False,
+ refit_validation_workflows=False):
super(RandomizedSearchCVfastr, self).__init__(
param_distributions=param_distributions, scoring=scoring, fit_params=fit_params,
n_iter=n_iter, random_state=random_state, n_jobs=n_jobs, iid=iid, refit=refit, cv=cv, verbose=verbose,
@@ -2108,7 +2180,8 @@ Source code for WORC.classification.SearchCV
return_train_score=return_train_score,
n_jobspercore=n_jobspercore, fastr_plugin=fastr_plugin,
memory=memory, maxlen=maxlen, ranking_score=ranking_score,
- refit_workflows=refit_workflows)
+ refit_training_workflows=refit_training_workflows,
+ refit_validation_workflows=refit_validation_workflows)
[docs] def fit(self, X, y=None, groups=None):
"""Randomized model selection and hyperparameter search.
@@ -2197,13 +2270,53 @@ Source code for WORC.classification.SearchCV
# if one choose to see train score, "out" will contain train score info
if self.return_train_score:
- (train_scores, test_scores, test_sample_counts,
- fit_time, score_time, parameters_all) =\
- save_data
+ if self.refit_training_workflows:
+ if self.refit_validation_workflows:
+ (train_scores, test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all,
+ fitted_workflows, fitted_validation_workflows) =\
+ zip(*save_data)
+ else:
+ fitted_validation_workflows = None
+ (train_scores, test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all,
+ fitted_workflows) =\
+ zip(*save_data)
+ else:
+ fitted_workflows = None
+ if self.refit_validation_workflows:
+ (train_scores, test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all,
+ fitted_validation_workflows) =\
+ zip(*save_data)
+ else:
+ fitted_validation_workflows = None
+ (train_scores, test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all) =\
+ zip(*save_data)
else:
- (test_scores, test_sample_counts,
- fit_time, score_time, parameters_all) =\
- save_data
+ if self.refit_training_workflows:
+ if self.refit_validation_workflows:
+ (test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all, fitted_workflows,
+ fitted_validation_workflows) =\
+ zip(*save_data)
+ else:
+ fitted_validation_workflows = None
+ (test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all, fitted_workflows) =\
+ zip(*save_data)
+ else:
+ fitted_workflows = None
+ if self.refit_validation_workflows:
+ (test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all,
+ fitted_validation_workflows) =\
+ zip(*save_data)
+ else:
+ (test_scores, test_sample_counts,
+ fit_time, score_time, parameters_all) =\
+ zip(*save_data)
self.process_fit(n_splits=n_splits,
parameters_all=parameters_all,
@@ -2213,7 +2326,9 @@ Source code for WORC.classification.SearchCV
fit_time=fit_time,
score_time=score_time,
cv_iter=cv_iter,
- X=X, y=y)
+ X=X, y=y,
+ fitted_workflows=fitted_workflows,
+ fitted_validation_workflows=fitted_validation_workflows)
return self
@@ -3193,15 +3308,15 @@ Source code for WORC.classification.SearchCV
# Process the results of the fitting procedure
self.process_fit(n_splits=n_splits,
- parameters_all=parameters_all,
- test_sample_counts=test_sample_counts,
- test_score_dicts=test_scores,
- train_score_dicts=train_scores,
- fit_time=fit_time,
- score_time=score_time,
- cv_iter=cv_iter,
- X=self.features, y=self.labels,
- use_smac=True)
+ parameters_all=parameters_all,
+ test_sample_counts=test_sample_counts,
+ test_score_dicts=test_scores,
+ train_score_dicts=train_scores,
+ fit_time=fit_time,
+ score_time=score_time,
+ cv_iter=cv_iter,
+ X=self.features, y=self.labels,
+ use_smac=True)
return self
@@ -3411,6 +3526,7 @@ Source code for WORC.classification.SearchCV
error_score='raise', return_train_score=True,
n_jobspercore=100, fastr_plugin=None, maxlen=100,
ranking_score='test_score', features=None, labels=None,
+ refit_training_workflows=False, refit_validation_workflows=False,
smac_result_file=None):
super(GuidedSearchCVSMAC, self).__init__(
param_distributions=param_distributions, scoring=scoring, fit_params=fit_params,
@@ -3418,7 +3534,8 @@ Source code for WORC.classification.SearchCV
pre_dispatch=pre_dispatch, error_score=error_score,
return_train_score=return_train_score,
n_jobspercore=n_jobspercore, fastr_plugin=fastr_plugin,
- maxlen=maxlen, ranking_score=ranking_score)
+ maxlen=maxlen, ranking_score=ranking_score, refit_training_workflows=refit_training_workflows,
+ refit_validation_workflows=refit_validation_workflows)
self.features = features
self.labels = labels
self.smac_result_file = smac_result_file
@@ -3457,7 +3574,7 @@ Source code for WORC.classification.SearchCV
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/construct_classifier.html b/WORC/doc/_build/html/_modules/WORC/classification/construct_classifier.html
index 32253edb..8787c53d 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/construct_classifier.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/construct_classifier.html
@@ -8,7 +8,7 @@
- WORC.classification.construct_classifier — WORC 3.6.0 documentation
+ WORC.classification.construct_classifier — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -258,10 +257,11 @@ Source code for WORC.classification.construct_classifier
min_child_weight=min_child_weight,
n_estimators=boosting_rounds,
colsample_bytree=colsample_bytree,
- random_state=config['random_seed'])
+ random_state=config['random_seed'],
+ n_jobs=1)
elif config['classifiers'] == 'XGBRegressor':
- # XGB Classifier
+ # XGB Regressor
max_depth = config['XGB_max_depth']
learning_rate = config['XGB_learning_rate']
gamma = config['XGB_gamma']
@@ -274,7 +274,8 @@ Source code for WORC.classification.construct_classifier
min_child_weight=min_child_weight,
n_estimators=boosting_rounds,
colsample_bytree=colsample_bytree,
- random_state=config['random_seed'])
+ random_state=config['random_seed'],
+ n_jobs=1)
elif config['classifiers'] == 'LightGBMClassifier':
# LightGBM Classifier
@@ -574,7 +575,7 @@ Source code for WORC.classification.construct_classifier
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/createfixedsplits.html b/WORC/doc/_build/html/_modules/WORC/classification/createfixedsplits.html
index 14a5ef20..2d2f1802 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/createfixedsplits.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/createfixedsplits.html
@@ -8,7 +8,7 @@
- WORC.classification.createfixedsplits — WORC 3.6.0 documentation
+ WORC.classification.createfixedsplits — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -302,7 +301,7 @@ Source code for WORC.classification.createfixedsplits
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/crossval.html b/WORC/doc/_build/html/_modules/WORC/classification/crossval.html
index a13806a8..7bfe3071 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/crossval.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/crossval.html
@@ -8,7 +8,7 @@
- WORC.classification.crossval — WORC 3.6.0 documentation
+ WORC.classification.crossval — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -165,7 +164,7 @@
Source code for WORC.classification.crossval
#!/usr/bin/env python
-# Copyright 2016-2021 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2022 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -186,15 +185,17 @@ Source code for WORC.classification.crossval
import os
import time
from time import gmtime, strftime
-from sklearn.model_selection import train_test_split, LeaveOneOut
-from .parameter_optimization import random_search_parameters, guided_search_parameters
-import WORC.addexceptions as ae
-from WORC.classification.regressors import regressors
import glob
import random
import json
-from copy import copy
+import copy
from sklearn.metrics import f1_score, roc_auc_score
+from sklearn.model_selection import train_test_split, LeaveOneOut
+from joblib import Parallel, delayed
+import WORC.addexceptions as ae
+from WORC.classification.parameter_optimization import random_search_parameters, guided_search_parameters
+from WORC.classification.regressors import regressors
+from WORC.classification.SearchCV import RandomizedSearchCVfastr
[docs]def random_split_cross_validation(image_features, feature_labels, classes,
@@ -205,7 +206,6 @@ Source code for WORC.classification.crossval
fixedsplits=None,
fixed_seed=False, use_fastr=None,
fastr_plugin=None,
- do_test_RS_Ensemble=False,
use_SMAC=False, smac_result_file=None):
"""Cross-validation in which data is randomly split in each iteration.
@@ -402,9 +402,9 @@ Source code for WORC.classification.crossval
Y_test, patient_ID_train, patient_ID_test, random_seed)
save_data.append(temp_save_data)
-
+
# Test performance for various RS and ensemble sizes
- if do_test_RS_Ensemble:
+ if config['General']['DoTestNRSNEns']:
output_json = os.path.join(tempfolder, f'performance_RS_Ens_crossval_{i}.json')
test_RS_Ensemble(estimator_input=trained_classifier,
X_train=X_train, Y_train=Y_train,
@@ -415,6 +415,8 @@ Source code for WORC.classification.crossval
# Save memory
delattr(trained_classifier, 'fitted_workflows')
trained_classifier.fitted_workflows = list()
+ delattr(trained_classifier, 'fitted_validation_workflows')
+ trained_classifier.fitted_validation_workflows = list()
# Create a temporary save
if tempsave:
@@ -432,12 +434,13 @@ Source code for WORC.classification.crossval
panda_data = pd.DataFrame(panda_data_temp)
n = 0
- filename = os.path.join(tempfolder, 'tempsave_' + str(i) + '.hdf5')
+ filename = os.path.join(tempfolder, 'tempsave_' + str(i) + '.pkl')
while os.path.exists(filename):
n += 1
- filename = os.path.join(tempfolder, 'tempsave_' + str(i + n) + '.hdf5')
+ filename = os.path.join(tempfolder, 'tempsave_' + str(i + n) + '.pkl')
- panda_data.to_hdf(filename, 'EstimatorData')
+ # panda_data.to_hdf(filename, 'EstimatorData')
+ panda_data.to_pickle(filename)
del panda_data, panda_data_temp
# Print elapsed time
@@ -675,13 +678,13 @@ Source code for WORC.classification.crossval
os.makedirs(tempfolder)
else:
# Previous tempsaves, start where we left of
- tempsaves = glob.glob(os.path.join(tempfolder, 'tempsave_*.hdf5'))
+ tempsaves = glob.glob(os.path.join(tempfolder, 'tempsave_*.pkl'))
start = len(tempsaves)
# Load previous tempsaves and add to save data
tempsaves.sort()
for t in tempsaves:
- t = pd.read_hdf(t)
+ t = pd.read_pickle(t)
t = t['Constructed crossvalidation']
temp_save_data = (t.trained_classifier, t.X_train, t.X_test,
t.Y_train, t.Y_test, t.patient_ID_train,
@@ -820,7 +823,7 @@ Source code for WORC.classification.crossval
[docs]def nocrossval(config, label_data_train, label_data_test, image_features_train,
image_features_test, param_grid=None, use_fastr=False,
fastr_plugin=None, ensemble={'Use': False},
- modus='singlelabel', do_test_RS_Ensemble=False):
+ modus='singlelabel'):
"""Constructs multiple individual classifiers based on the label settings.
Arguments:
@@ -952,7 +955,7 @@ Source code for WORC.classification.crossval
classifier_labelss[i_name] = panda_data_temp
# Test performance for various RS and ensemble sizes
- if do_test_RS_Ensemble:
+ if config['General']['DoTestNRSNEns']:
# FIXME: Use home folder, as this function does not know
# Where final or temporary output is located
output_json = os.path.join(os.path.expanduser("~"),
@@ -967,6 +970,8 @@ Source code for WORC.classification.crossval
# Save memory
delattr(trained_classifier, 'fitted_workflows')
trained_classifier.fitted_workflows = list()
+ delattr(trained_classifier, 'fitted_validation_workflows')
+ trained_classifier.fitted_validation_workflows = list()
panda_data = pd.DataFrame(classifier_labelss)
@@ -974,24 +979,29 @@ Source code for WORC.classification.crossval
[docs]def test_RS_Ensemble(estimator_input, X_train, Y_train, X_test, Y_test,
- feature_labels, output_json):
+ feature_labels, output_json, verbose=False, RSs=None,
+ ensembles=None, maxlen=100):
"""Test performance for different random search and ensemble sizes.
This function is written for conducting a specific experiment from the
WORC paper to test how the performance varies with varying random search
and ensemble sizes. We do not recommend usage in general of this part.
+
+ maxlen = 100 # max ensembles numeric
+
"""
-
# Process some input
- estimator_original = copy(estimator_input)
+ estimator_original = copy.deepcopy(estimator_input)
X_train_temp = [(x, feature_labels) for x in X_train]
- n_workflows = len(estimator_original.fitted_workflows)
-
+ n_workflows = len(estimator_original.cv_results_['mean_test_score'])
+
# Settings
- RSs = [10, 50, 100, 1000, 10000] * 10 + [n_workflows]
- ensembles = [1, 10, 50, 100]
- maxlen = max(ensembles)
-
+ if RSs is None:
+ RSs = [10, 100, 1000, 10000] * 10 + [n_workflows]
+
+ if ensembles is None:
+ ensembles = [1, 10, 100, 'FitNumber', 'Bagging']
+
# Loop over the random searches and ensembles
keys = list()
performances = dict()
@@ -1007,9 +1017,11 @@ Source code for WORC.classification.crossval
# Make a local copy of the estimator and select only subset of workflows
print(f'\t Using RS {RS}.')
- estimator = copy(estimator_original)
+ estimator = copy.deepcopy(estimator_original)
+ # estimator.maxlen = RS # Why is this needed? This will only lead to a lot of extra workflows on top of the top 100 being fitted
+ estimator.maxlen = min(RS, maxlen)
workflow_num = np.arange(n_workflows).tolist()
-
+
# Select only a specific set of workflows
random.shuffle(workflow_num)
selected_workflows = workflow_num[0:RS]
@@ -1018,44 +1030,91 @@ Source code for WORC.classification.crossval
F1_validation = estimator.cv_results_['mean_test_score']
F1_validation = [F1_validation[i] for i in selected_workflows]
workflow_ranking = np.argsort(np.asarray(F1_validation)).tolist()[::-1] # Normally, rank from smallest to largest, so reverse
+ workflow_ranking = workflow_ranking[0:maxlen] # only maxlen estimators needed for ensembling tests
F1_validation = [F1_validation[i] for i in workflow_ranking]
- # Only keep the number of RS required and resort based on ensemble
- estimator.fitted_workflows =\
- [estimator.fitted_workflows[i] for i in selected_workflows]
- estimator.fitted_workflows =\
- [estimator.fitted_workflows[i] for i in workflow_ranking]
-
+ # Only keep the number of RS required and resort based on ranking
+ if estimator.fitted_workflows:
+ estimator.fitted_workflows =\
+ [estimator.fitted_workflows[i] for i in selected_workflows]
+ estimator.fitted_workflows =\
+ [estimator.fitted_workflows[i] for i in workflow_ranking]
+
+ # For advanced ensembling methods, keep only the parameters of the selected RS workflows
+ estimator.cv_results_['params'] =\
+ [estimator.cv_results_['params'][i] for i in selected_workflows]
+ estimator.cv_results_['params'] =\
+ [estimator.cv_results_['params'][i] for i in workflow_ranking]
+
+ # Refit validation estimators if required
+ if not estimator.fitted_validation_workflows and estimator.refit_validation_workflows:
+ print('\t Refit all validation workflows so we dont have to do this for every ensembling method.')
+
+ # Define function to fit a single estimator
+ def fitvalidationestimator(parameters, train, test):
+ new_estimator = RandomizedSearchCVfastr()
+ new_estimator.refit_and_score(X_train_temp, Y_train, parameters,
+ train=train, test=test)
+ return new_estimator
+
+ # Use joblib to parallelize fitting
+ estimators =\
+ Parallel(n_jobs=-1)(delayed(fitvalidationestimator)(
+ parameters, train, test)
+ for parameters in estimator.cv_results_['params']
+ for train, test in estimator.cv_iter)
+ estimator.fitted_validation_workflows = estimators
+
+ elif estimator.fitted_validation_workflows:
+ # Select the required already fitted validation workflows
+ selected_workflows_ranked_all = list()
+ for j in range(len(estimator.cv_iter)):
+ selected_workflows_ranked = [i + n_workflows * j for i in selected_workflows]
+ selected_workflows_ranked = [selected_workflows_ranked[i] for i in workflow_ranking]
+ selected_workflows_ranked_all.extend(selected_workflows_ranked)
+
+ estimator.fitted_validation_workflows =\
+ [estimator.fitted_validation_workflows[i] for i in selected_workflows_ranked_all]
+
# Store train and validation AUC
- mean_val_F1 = F1_validation[0:maxlen]
F1_training = estimator.cv_results_['mean_train_score']
F1_training = [F1_training[i] for i in selected_workflows]
F1_training = [F1_training[i] for i in workflow_ranking]
- mean_train_F1 = F1_training[0:maxlen]
- performances[f'Mean training F1-score {key} top {maxlen}'] = mean_train_F1
- performances[f'Mean validation F1-score {key} top {maxlen}'] = mean_val_F1
+ performances[f'Mean training F1-score {key} top {maxlen}'] = F1_validation
+ performances[f'Mean validation F1-score {key} top {maxlen}'] = F1_training
for ensemble in ensembles:
- if ensemble <= RS:
- print(f'\t Using ensemble {ensemble}.')
+ if isinstance(ensemble, int):
+ if ensemble > RS:
+ continue
+ else:
+ print(f'\t Using ensemble {ensemble}.')
+ # Create the ensemble
+ estimator.create_ensemble(X_train_temp, Y_train, method='top_N',
+ size=ensemble, verbose=verbose)
+ else:
+ print(f'\t Using ensemble {ensemble}.')
# Create the ensemble
- estimator.create_ensemble(X_train_temp, Y_train, method=ensemble)
-
- # Compute performance
- y_prediction = estimator.predict(X_test)
- y_score = estimator.predict_proba(X_test)[:, 1]
- auc = roc_auc_score(Y_test, y_score)
- f1_score_out = f1_score(Y_test, y_prediction, average='weighted')
- performances[f'Test F1-score Ensemble {ensemble} {key}'] = f1_score_out
- performances[f'Test AUC Ensemble {ensemble} {key}'] = auc
-
- y_prediction = estimator.predict(X_train)
- y_score = estimator.predict_proba(X_train)[:, 1]
- auc = roc_auc_score(Y_train, y_score)
- f1_score_out = f1_score(Y_train, y_prediction, average='weighted')
- performances[f'Train F1-score Ensemble {ensemble} {key}'] = f1_score_out
- performances[f'Train AUC Ensemble {ensemble} {key}'] = auc
+ estimator.create_ensemble(X_train_temp, Y_train, method=ensemble,
+ verbose=verbose)
+
+ performances[f'Validation F1-score Ensemble {ensemble} {key}'] = estimator.ensemble_validation_score
+
+ # Compute performance
+ y_prediction = estimator.predict(X_test)
+ y_score = estimator.predict_proba(X_test)[:, 1]
+ auc = roc_auc_score(Y_test, y_score)
+ f1_score_out = f1_score(Y_test, y_prediction, average='weighted')
+ performances[f'Test F1-score Ensemble {ensemble} {key}'] = f1_score_out
+ performances[f'Test AUC Ensemble {ensemble} {key}'] = auc
+
+ y_prediction = estimator.predict(X_train)
+ y_score = estimator.predict_proba(X_train)[:, 1]
+ auc = roc_auc_score(Y_train, y_score)
+ f1_score_out = f1_score(Y_train, y_prediction, average='weighted')
+ performances[f'Train F1-score Ensemble {ensemble} {key}'] = f1_score_out
+ performances[f'Train AUC Ensemble {ensemble} {key}'] = auc
# Write output
with open(output_json, 'w') as fp:
@@ -1072,7 +1131,7 @@ Source code for WORC.classification.crossval
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/estimators.html b/WORC/doc/_build/html/_modules/WORC/classification/estimators.html
index 559879c5..995ab682 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/estimators.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/estimators.html
@@ -8,7 +8,7 @@
- WORC.classification.estimators — WORC 3.6.0 documentation
+ WORC.classification.estimators — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -326,7 +325,7 @@ Source code for WORC.classification.estimators
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/fitandscore.html b/WORC/doc/_build/html/_modules/WORC/classification/fitandscore.html
index 77e5d3fb..61273bd3 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/fitandscore.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/fitandscore.html
@@ -8,7 +8,7 @@
- WORC.classification.fitandscore — WORC 3.6.0 documentation
+ WORC.classification.fitandscore — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -225,8 +224,9 @@ Source code for WORC.classification.fitandscore
<
return_times=True, return_parameters=False,
return_estimator=False,
error_score='raise', verbose=False,
- return_all=True, refit_workflows=False,
- use_smac=False):
+ return_all=True, refit_training_workflows=False,
+ refit_validation_workflows=False,
+ skip=False):
"""Fit an estimator to a dataset and score the performance.
The following
@@ -389,8 +389,8 @@ Source code for WORC.classification.fitandscore
<
# Split in train and testing
X_train, y_train = _safe_split(estimator, feature_values, y, train)
X_test, y_test = _safe_split(estimator, feature_values, y, test, train)
- train = np.arange(0, len(y_train))
- test = np.arange(len(y_train), len(y_train) + len(y_test))
+ new_train = np.arange(0, len(y_train))
+ new_test = np.arange(len(y_train), len(y_train) + len(y_test))
# Set some defaults for if a part fails and we return a dummy
fit_time = np.inf
@@ -429,7 +429,10 @@ Source code for WORC.classification.fitandscore
<
# Additional to sklearn defaults: return all parameters and refitted estimator
ret.append(parameters)
- if refit_workflows:
+ if refit_training_workflows:
+ ret.append(None)
+
+ if refit_validation_workflows:
ret.append(None)
# ------------------------------------------------------------------------
@@ -469,6 +472,7 @@ Source code for WORC.classification.fitandscore
<
imp_type = para_estimator['ImputationMethod']
if verbose:
print(f'Imputing NaN with {imp_type}.')
+
# Only used with KNN in SMAC, otherwise assign default
if 'ImputationNeighbours' in para_estimator.keys():
imp_nn = para_estimator['ImputationNeighbours']
@@ -480,14 +484,34 @@ Source code for WORC.classification.fitandscore
<
imputer.fit(X_train)
original_shape = X_train.shape
- X_train = imputer.transform(X_train)
- imputed_shape = X_train.shape
- X_test = imputer.transform(X_test)
-
+ imputed_shape = imputer.transform(X_train).shape
+
if original_shape != imputed_shape:
removed_features = original_shape[1] - imputed_shape[1]
- raise ae.WORCValueError(f'Several features ({removed_features}) were np.NaN for all objects. Hence, imputation was not possible. Either make sure this is correct and turn of imputation, or correct the feature.')
+ if para_estimator['ImputationSkipAllNaN'] == 'True':
+ print(f"[WARNING]: Several features ({removed_features}) were np.NaN for all objects. config['Imputation']['skipallNaN'] set to True, so simply eliminate these features.")
+ if hasattr(imputer.Imputer, 'statistics_'):
+ X_train = imputer.transform(X_train)
+ X_test = imputer.transform(X_test)
+ feature_labels_zero = [fl for fnum, fl in enumerate(feature_labels[0]) if not np.isnan(imputer.Imputer.statistics_[fnum])]
+ feature_labels = [feature_labels_zero for i in X_train]
+ else:
+ # Fit a mean imputer to transform the labels
+ temp_imputer = Imputer(missing_values=np.nan, strategy='mean')
+ temp_imputer.fit(X_train)
+ X_train = imputer.transform(X_train)
+ X_test = imputer.transform(X_test)
+ feature_labels_zero = [fl for fnum, fl in enumerate(feature_labels[0]) if not np.isnan(temp_imputer.Imputer.statistics_[fnum])]
+ feature_labels = [feature_labels_zero for i in X_train]
+ else:
+ raise ae.WORCValueError(f'Several features ({removed_features}) were np.NaN for all objects. Hence, imputation was not possible. Either make sure this is correct and turn of imputation, or correct the feature.')
+ else:
+ X_train = imputer.transform(X_train)
+ X_test = imputer.transform(X_test)
+ if 'ImputationSkipAllNaN' in para_estimator.keys():
+ del para_estimator['ImputationSkipAllNaN']
+
del para_estimator['Imputation']
del para_estimator['ImputationMethod']
if 'ImputationNeighbours' in para_estimator.keys():
@@ -606,7 +630,34 @@ Source code for WORC.classification.fitandscore
<
X_test = VarSel.transform(X_test)
except ValueError:
if verbose:
- print('[WARNING]: No features meet the selected Variance threshold! Skipping selection.')
+ print('[WARNING]: No features meet the selected variance threshold.')
+
+ VarSel = None
+ if skip:
+ if verbose:
+ print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
+ parameters['Featsel_Variance'] = 'False'
+ else:
+ if verbose:
+ print('[WARNING] Returning NaN as performance.')
+
+ # return NaN as performance
+ para_estimator = delete_nonestimator_parameters(para_estimator)
+
+ # Update the runtime
+ end_time = time.time()
+ runtime = end_time - start_time
+ if return_train_score:
+ ret[3] = runtime
+ else:
+ ret[2] = runtime
+ if return_all:
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer, pca,\
+ StatisticalSel, ReliefSel, Sampler
+ else:
+ return ret
+
if verbose:
print("\t New Length: " + str(len(X_train[0])))
@@ -616,31 +667,10 @@ Source code for WORC.classification.fitandscore
<
if not return_all:
del VarSel
- # Check whether there are any features left
- if len(X_train[0]) == 0:
- # TODO: Make a specific WORC exception for this warning.
- if verbose:
- print('[WARNING]: No features are selected! Probably your features have too little variance. Parameters:')
- print(parameters)
- para_estimator = delete_nonestimator_parameters(para_estimator)
-
- # Update the runtime
- end_time = time.time()
- runtime = end_time - start_time
- if return_train_score:
- ret[3] = runtime
- else:
- ret[2] = runtime
-
- if return_all:
- return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
- else:
- return ret
-
# ------------------------------------------------------------------------
# Feature scaling
if verbose and para_estimator['FeatureScaling'] != 'None':
- print(f'Fitting scaler and transforming features, method ' +
+ print('Fitting scaler and transforming features, method ' +
f'{para_estimator["FeatureScaling"]}.')
scaling_method = para_estimator['FeatureScaling']
@@ -652,7 +682,7 @@ Source code for WORC.classification.fitandscore
<
if n_skip_feat == len(X_train[0]):
# Don't need to scale any features
if verbose:
- print('[WORC Warning] Skipping scaling, only skip features selected.')
+ print('[WARNING] Skipping scaling, only skip features selected.')
scaler = None
else:
scaler = WORCScaler(method=scaling_method, skip_features=skip_features)
@@ -695,12 +725,43 @@ Source code for WORC.classification.fitandscore
<
print("\t Original Length: " + str(len(X_train[0])))
# Transform all objects accordingly
- X_train = ReliefSel.transform(X_train)
- X_test = ReliefSel.transform(X_test)
+ X_train_temp = ReliefSel.transform(X_train)
+ if len(X_train_temp[0]) == 0:
+ if verbose:
+ print('[WARNING]: No features are selected! Probably RELIEF could not properly select features.')
+
+ ReliefSel = None
+ if skip:
+ if verbose:
+ print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
+ parameters['ReliefUse'] = 'False'
+ else:
+ if verbose:
+ print('[WARNING] Returning NaN as performance.')
+
+ # return NaN as performance
+ para_estimator = delete_nonestimator_parameters(para_estimator)
+
+ # Update the runtime
+ end_time = time.time()
+ runtime = end_time - start_time
+ if return_train_score:
+ ret[3] = runtime
+ else:
+ ret[2] = runtime
+ if return_all:
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer, pca,\
+ StatisticalSel, ReliefSel, Sampler
+ else:
+ return ret
+ else:
+ X_train = X_train_temp
+ X_test = ReliefSel.transform(X_test)
- if verbose:
- print("\t New Length: " + str(len(X_train[0])))
- feature_labels = ReliefSel.transform(feature_labels)
+ if verbose:
+ print("\t New Length: " + str(len(X_train[0])))
+ feature_labels = ReliefSel.transform(feature_labels)
del para_estimator['ReliefUse']
del para_estimator['ReliefNN']
@@ -712,27 +773,6 @@ Source code for WORC.classification.fitandscore
<
if not return_all:
del ReliefSel
- # Check whether there are any features left
- if len(X_train[0]) == 0:
- # TODO: Make a specific WORC exception for this warning.
- if verbose:
- print('[WARNING]: No features are selected! Probably RELIEF could not properly select features. Parameters:')
- print(parameters)
- para_estimator = delete_nonestimator_parameters(para_estimator)
-
- # Update the runtime
- end_time = time.time()
- runtime = end_time - start_time
- if return_train_score:
- ret[3] = runtime
- else:
- ret[2] = runtime
-
- if return_all:
- return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
- else:
- return ret
-
# ------------------------------------------------------------------------
# Perform feature selection using a model
if 'SelectFromModel' in para_estimator.keys():
@@ -774,9 +814,34 @@ Source code for WORC.classification.fitandscore
<
X_train_temp = SelectModel.transform(X_train)
if len(X_train_temp[0]) == 0:
if verbose:
- print('[WORC WARNING]: No features are selected! Probably your data is too noisy or the selection too strict. Skipping SelectFromModel.')
+ print('[WARNING]: No features are selected! Probably your data is too noisy or the selection too strict.')
+
SelectModel = None
- parameters['SelectFromModel'] = 'False'
+ if skip:
+ if verbose:
+ print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
+ parameters['SelectFromModel'] = 'False'
+ else:
+ if verbose:
+ print('[WARNING] Returning NaN as performance.')
+
+ # return NaN as performance
+ para_estimator = delete_nonestimator_parameters(para_estimator)
+
+ # Update the runtime
+ end_time = time.time()
+ runtime = end_time - start_time
+ if return_train_score:
+ ret[3] = runtime
+ else:
+ ret[2] = runtime
+ if return_all:
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer, pca,\
+ StatisticalSel, ReliefSel, Sampler
+ else:
+ return ret
+
else:
X_train = SelectModel.transform(X_train)
X_test = SelectModel.transform(X_test)
@@ -797,27 +862,6 @@ Source code for WORC.classification.fitandscore
<
if not return_all:
del SelectModel
- # Check whether there are any features left
- if len(X_train[0]) == 0:
- # TODO: Make a specific WORC exception for this warning.
- if verbose:
- print('[WARNING]: No features are selected! Probably SelectFromModel could not properly select features. Parameters:')
- print(parameters)
- para_estimator = delete_nonestimator_parameters(para_estimator)
-
- # Update the runtime
- end_time = time.time()
- runtime = end_time - start_time
- if return_train_score:
- ret[3] = runtime
- else:
- ret[2] = runtime
-
- if return_all:
- return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
- else:
- return ret
-
# ----------------------------------------------------------------
# PCA dimensionality reduction
# Principle Component Analysis
@@ -832,55 +876,78 @@ Source code for WORC.classification.fitandscore
<
pca.fit(X_train)
except (ValueError, LinAlgError) as e:
if verbose:
- print(f'[WARNING]: skipping this setting due to PCA Error: {e}.')
+ print(f'[WARNING] PCA Error: {e}.')
pca = None
-
- # Update the runtime
- end_time = time.time()
- runtime = end_time - start_time
- if return_train_score:
- ret[3] = runtime
- else:
- ret[2] = runtime
-
- if return_all:
- return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
+ if skip:
+ if verbose:
+ print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
+ parameters['UsePCA'] = 'False'
else:
- return ret
-
- evariance = pca.explained_variance_ratio_
- num = 0
- sum = 0
- while sum < 0.95:
- sum += evariance[num]
- num += 1
-
- # Make a PCA based on the determined amound of components
- pca = PCA(n_components=num, random_state=random_seed)
- try:
- pca.fit(X_train)
- except (ValueError, LinAlgError) as e:
- if verbose:
- print(f'[WARNING]: skipping this setting due to PCA Error: {e}.')
-
- pca = None
+ if verbose:
+ print('[WARNING] Returning NaN as performance.')
+
+ # return NaN as performance
+ para_estimator = delete_nonestimator_parameters(para_estimator)
+
+ # Update the runtime
+ end_time = time.time()
+ runtime = end_time - start_time
+ if return_train_score:
+ ret[3] = runtime
+ else:
+ ret[2] = runtime
+ if return_all:
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer, pca,\
+ StatisticalSel, ReliefSel, Sampler
+ else:
+ return ret
- # Update the runtime
- end_time = time.time()
- runtime = end_time - start_time
- if return_train_score:
- ret[3] = runtime
- else:
- ret[2] = runtime
+ else:
+ evariance = pca.explained_variance_ratio_
+ num = 0
+ sum = 0
+ while sum < 0.95:
+ sum += evariance[num]
+ num += 1
+
+ # Make a PCA based on the determined amound of components
+ pca = PCA(n_components=num, random_state=random_seed)
+ try:
+ pca.fit(X_train)
+ except (ValueError, LinAlgError) as e:
+ if verbose:
+ print(f'[WARNING]: PCA Error: {e}.')
- if return_all:
- return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
+ pca = None
+ if skip:
+ if verbose:
+ print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
+ parameters['UsePCA'] = 'False'
+ else:
+ if verbose:
+ print('[WARNING] Returning NaN as performance.')
+
+ # return NaN as performance
+ para_estimator = delete_nonestimator_parameters(para_estimator)
+
+ # Update the runtime
+ end_time = time.time()
+ runtime = end_time - start_time
+ if return_train_score:
+ ret[3] = runtime
+ else:
+ ret[2] = runtime
+ if return_all:
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer, pca,\
+ StatisticalSel, ReliefSel, Sampler
+ else:
+ return ret
else:
- return ret
-
- X_train = pca.transform(X_train)
- X_test = pca.transform(X_test)
+ X_train = pca.transform(X_train)
+ X_test = pca.transform(X_test)
else:
# Assume a fixed number of components: cannot be larger than
@@ -889,24 +956,43 @@ Source code for WORC.classification.fitandscore
<
if n_components >= len(X_train[0]):
if verbose:
- print(f"[WORC WARNING] PCA n_components ({n_components})> n_features ({len(X_train[0])}): skipping PCA.")
+ print(f"[WARNING] PCA n_components ({n_components})> n_features ({len(X_train[0])}): skipping PCA.")
else:
pca = PCA(n_components=n_components, random_state=random_seed)
try:
pca.fit(X_train)
+ X_train = pca.transform(X_train)
+ X_test = pca.transform(X_test)
except (ValueError, LinAlgError) as e:
if verbose:
- print(f'[WARNING]: skipping this setting due to PCA Error: {e}.')
+ print(f'[WARNING] PCA Error: {e}.')
pca = None
- if return_all:
- return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
+ if skip:
+ if verbose:
+ print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
+ parameters['UsePCA'] = 'False'
else:
- return ret
-
- X_train = pca.transform(X_train)
- X_test = pca.transform(X_test)
-
+ if verbose:
+ print('[WARNING] Returning NaN as performance.')
+
+ # return NaN as performance
+ para_estimator = delete_nonestimator_parameters(para_estimator)
+
+ # Update the runtime
+ end_time = time.time()
+ runtime = end_time - start_time
+ if return_train_score:
+ ret[3] = runtime
+ else:
+ ret[2] = runtime
+ if return_all:
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer, pca,\
+ StatisticalSel, ReliefSel, Sampler
+ else:
+ return ret
+
if verbose:
print("\t New Length: " + str(len(X_train[0])))
@@ -934,24 +1020,36 @@ Source code for WORC.classification.fitandscore
<
StatisticalSel.fit(X_train, y)
X_train_temp = StatisticalSel.transform(X_train)
- if len(X_train_temp[0]) == 0:
+ if len(X_train_temp[0]) == 0:
if verbose:
- print('[WORC WARNING]: No features are selected! Probably your statistical test feature selection was too strict. Skipping thresholding.')
- para_estimator = delete_nonestimator_parameters(para_estimator)
- # Update the runtime
- end_time = time.time()
- runtime = end_time - start_time
- if return_train_score:
- ret[3] = runtime
- else:
- ret[2] = runtime
- if return_all:
- return ret, GroupSel, VarSel, SelectModel,\
- feature_labels[0], scaler, encoder, imputer, pca,\
- StatisticalSel, ReliefSel, Sampler
+ print('[WARNING] No features are selected! Probably your statistical test feature selection was too strict.')
+
+ StatisticalSel = None
+ if skip:
+ if verbose:
+ print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
+ parameters['StatisticalTestUse'] = 'False'
else:
- return ret
-
+ if verbose:
+ print('[WARNING] Returning NaN as performance.')
+
+ # return NaN as performance
+ para_estimator = delete_nonestimator_parameters(para_estimator)
+
+ # Update the runtime
+ end_time = time.time()
+ runtime = end_time - start_time
+ if return_train_score:
+ ret[3] = runtime
+ else:
+ ret[2] = runtime
+ if return_all:
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer, pca,\
+ StatisticalSel, ReliefSel, Sampler
+ else:
+ return ret
+
else:
X_train = StatisticalSel.transform(X_train)
X_test = StatisticalSel.transform(X_test)
@@ -1014,7 +1112,7 @@ Source code for WORC.classification.fitandscore
<
except ae.WORCValueError as e:
message = str(e)
if verbose:
- print('[WORC WARNING] Skipping resampling: ' + message)
+ print('[WARNING] Skipping resampling: ' + message)
Sampler = None
parameters['Resampling_Use'] = 'False'
@@ -1045,7 +1143,8 @@ Source code for WORC.classification.fitandscore
<
neg = int(len(y_train_temp) - pos)
if pos < 10 or neg < 10:
if verbose:
- print(f'[WORC WARNING] Skipping resampling: to few objects returned in one or both classes (pos: {pos}, neg: {neg}).')
+ print(f'[WARNING] Skipping resampling: to few objects returned in one or both classes (pos: {pos}, neg: {neg}).')
+
Sampler = None
parameters['Resampling_Use'] = 'False'
else:
@@ -1061,8 +1160,8 @@ Source code for WORC.classification.fitandscore
<
print(message)
# Also reset train and test indices
- train = np.arange(0, len(y_train))
- test = np.arange(len(y_train), len(y_train) + len(y_test))
+ new_train = np.arange(0, len(y_train))
+ new_test = np.arange(len(y_train), len(y_train) + len(y_test))
# Delete the resampling parameters
del para_estimator['Resampling_Use']
@@ -1122,8 +1221,8 @@ Source code for WORC.classification.fitandscore
<
try:
ret = _fit_and_score(estimator, feature_values, y_all,
- scorers, train,
- test, verbose,
+ scorers, new_train,
+ new_test, verbose,
para_estimator, fit_params,
return_train_score=return_train_score,
return_parameters=return_parameters,
@@ -1154,13 +1253,21 @@ Source code for WORC.classification.fitandscore
<
# Add original parameters to return object
ret.append(parameters)
- if refit_workflows:
+ if refit_training_workflows:
+ # Refit estimator on train-test training dataset
indices = np.arange(0, len(y))
estimator = WORC.classification.SearchCV.RandomizedSearchCVfastr()
estimator.refit_and_score(X, y, parameters,
train=indices, test=indices)
ret.append(estimator)
-
+
+ if refit_validation_workflows:
+ # Refit estimator on train-validation training dataset
+ estimator = WORC.classification.SearchCV.RandomizedSearchCVfastr()
+ estimator.refit_and_score(X, y, parameters,
+ train=train, test=test)
+ ret.append(estimator)
+
# End the timing and store the fit_time
end_time = time.time()
runtime = end_time - start_time
@@ -1193,6 +1300,7 @@ Source code for WORC.classification.fitandscore
<
'Imputation',
'ImputationMethod',
'ImputationNeighbours',
+ 'ImputationSkipAllNaN',
'SelectFromModel',
'SelectFromModel_lasso_alpha',
'SelectFromModel_estimator',
@@ -1230,9 +1338,9 @@ Source code for WORC.classification.fitandscore
<
if np.isnan(value):
if verbose:
if feature_labels is not None:
- print(f"[WORC WARNING] NaN found, patient {pnum}, label {feature_labels[fnum]}. Replacing with zero.")
+ print(f"[WARNING] NaN found, patient {pnum}, label {feature_labels[fnum]}. Replacing with zero.")
else:
- print(f"[WORC WARNING] NaN found, patient {pnum}, label {fnum}. Replacing with zero.")
+ print(f"[WARNING] NaN found, patient {pnum}, label {fnum}. Replacing with zero.")
# Note: X is a list of lists, hence we cannot index the element directly
image_features_temp[pnum, fnum] = 0
@@ -1297,7 +1405,7 @@ Source code for WORC.classification.fitandscore
<
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/metrics.html b/WORC/doc/_build/html/_modules/WORC/classification/metrics.html
index 3a6c66d0..1af35975 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/metrics.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/metrics.html
@@ -8,7 +8,7 @@
- WORC.classification.metrics — WORC 3.6.0 documentation
+ WORC.classification.metrics — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -631,7 +630,7 @@ Source code for WORC.classification.metrics
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/parameter_optimization.html b/WORC/doc/_build/html/_modules/WORC/classification/parameter_optimization.html
index 1315e80f..9d4cf773 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/parameter_optimization.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/parameter_optimization.html
@@ -8,7 +8,7 @@
- WORC.classification.parameter_optimization — WORC 3.6.0 documentation
+ WORC.classification.parameter_optimization — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -192,7 +191,8 @@ Source code for WORC.classification.parameter_optimization
n_cores=1, fastr_plugin=None,
memory='2G', maxlen=100, ranking_score='test_score',
random_seed=None,
- refit_workflows=False):
+ refit_training_workflows=False,
+ refit_validation_workflows=False):
"""
Train a classifier and simultaneously optimizes hyperparameters using a
randomized search.
@@ -245,7 +245,8 @@ Source code for WORC.classification.parameter_optimization
fastr_plugin=fastr_plugin,
memory=memory,
ranking_score=ranking_score,
- refit_workflows=refit_workflows)
+ refit_training_workflows=refit_training_workflows,
+ refit_validation_workflows=refit_validation_workflows)
else:
random_search = RandomizedSearchCVJoblib(param_distributions=param_grid,
n_iter=N_iter,
@@ -257,7 +258,8 @@ Source code for WORC.classification.parameter_optimization
fastr_plugin=fastr_plugin,
memory=memory,
ranking_score=ranking_score,
- refit_workflows=refit_workflows)
+ refit_training_workflows=refit_training_workflows,
+ refit_validation_workflows=refit_validation_workflows)
random_search.fit(features, labels)
print("Best found parameters:")
for i in random_search.best_params_:
@@ -272,7 +274,8 @@ Source code for WORC.classification.parameter_optimization
n_jobspercore=200, use_fastr=False,
n_cores=1, fastr_plugin=None,
memory='2G', maxlen=100, ranking_score='test_score',
- random_seed=None, refit_workflows=False,
+ random_seed=None, refit_training_workflows=False,
+ refit_validation_workflows=False,
smac_result_file=None):
"""
Train a classifier and simultaneously optimizes hyperparameters using a
@@ -326,7 +329,9 @@ Source code for WORC.classification.parameter_optimization
ranking_score=ranking_score,
features=features,
labels=labels,
- smac_result_file=smac_result_file)
+ smac_result_file=smac_result_file,
+ refit_training_workflows=refit_training_workflows,
+ refit_validation_workflows=refit_validation_workflows)
guided_search.fit(features, labels)
print("Best found parameters:")
@@ -348,7 +353,7 @@ Source code for WORC.classification.parameter_optimization
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/trainclassifier.html b/WORC/doc/_build/html/_modules/WORC/classification/trainclassifier.html
index 8f6ae959..7308a4cc 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/trainclassifier.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/trainclassifier.html
@@ -8,7 +8,7 @@
- WORC.classification.trainclassifier — WORC 3.6.0 documentation
+ WORC.classification.trainclassifier — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -391,6 +390,7 @@ Source code for WORC.classification.trainclassifier
param_grid['Imputation'] = config['Imputation']['use']
param_grid['ImputationMethod'] = config['Imputation']['strategy']
+ param_grid['ImputationSkipAllNaN'] = config['Imputation']['skipallNaN']
param_grid['ImputationNeighbours'] =\
discrete_uniform(loc=config['Imputation']['n_neighbors'][0],
scale=config['Imputation']['n_neighbors'][1])
@@ -458,7 +458,7 @@ Source code for WORC.classification.trainclassifier
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/detectors/detectors.html b/WORC/doc/_build/html/_modules/WORC/detectors/detectors.html
index 938d1dd6..7ea5edd4 100644
--- a/WORC/doc/_build/html/_modules/WORC/detectors/detectors.html
+++ b/WORC/doc/_build/html/_modules/WORC/detectors/detectors.html
@@ -8,7 +8,7 @@
- WORC.detectors.detectors — WORC 3.6.0 documentation
+ WORC.detectors.detectors — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -311,7 +310,7 @@ Source code for WORC.detectors.detectors
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/exampledata/datadownloader.html b/WORC/doc/_build/html/_modules/WORC/exampledata/datadownloader.html
index 08383a48..0104c4e2 100644
--- a/WORC/doc/_build/html/_modules/WORC/exampledata/datadownloader.html
+++ b/WORC/doc/_build/html/_modules/WORC/exampledata/datadownloader.html
@@ -8,7 +8,7 @@
- WORC.exampledata.datadownloader — WORC 3.6.0 documentation
+ WORC.exampledata.datadownloader — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -348,7 +347,7 @@ Source code for WORC.exampledata.datadownloader
<
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/featureprocessing/Imputer.html b/WORC/doc/_build/html/_modules/WORC/featureprocessing/Imputer.html
index 1945c0da..f5ab019c 100644
--- a/WORC/doc/_build/html/_modules/WORC/featureprocessing/Imputer.html
+++ b/WORC/doc/_build/html/_modules/WORC/featureprocessing/Imputer.html
@@ -8,7 +8,7 @@
- WORC.featureprocessing.Imputer — WORC 3.6.0 documentation
+ WORC.featureprocessing.Imputer — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -258,7 +257,7 @@ Source code for WORC.featureprocessing.Imputer
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/featureprocessing/Relief.html b/WORC/doc/_build/html/_modules/WORC/featureprocessing/Relief.html
index 2d98c9a6..b0d9e2cb 100644
--- a/WORC/doc/_build/html/_modules/WORC/featureprocessing/Relief.html
+++ b/WORC/doc/_build/html/_modules/WORC/featureprocessing/Relief.html
@@ -8,7 +8,7 @@
- WORC.featureprocessing.Relief — WORC 3.6.0 documentation
+ WORC.featureprocessing.Relief — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -413,7 +412,7 @@ Source code for WORC.featureprocessing.Relief
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/featureprocessing/SelectGroups.html b/WORC/doc/_build/html/_modules/WORC/featureprocessing/SelectGroups.html
index ac0b0219..23886ac2 100644
--- a/WORC/doc/_build/html/_modules/WORC/featureprocessing/SelectGroups.html
+++ b/WORC/doc/_build/html/_modules/WORC/featureprocessing/SelectGroups.html
@@ -8,7 +8,7 @@
- WORC.featureprocessing.SelectGroups — WORC 3.6.0 documentation
+ WORC.featureprocessing.SelectGroups — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -370,7 +369,7 @@ Source code for WORC.featureprocessing.SelectGroups
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/featureprocessing/SelectIndividuals.html b/WORC/doc/_build/html/_modules/WORC/featureprocessing/SelectIndividuals.html
index 9191915d..3d12c4b8 100644
--- a/WORC/doc/_build/html/_modules/WORC/featureprocessing/SelectIndividuals.html
+++ b/WORC/doc/_build/html/_modules/WORC/featureprocessing/SelectIndividuals.html
@@ -8,7 +8,7 @@
- WORC.featureprocessing.SelectIndividuals — WORC 3.6.0 documentation
+ WORC.featureprocessing.SelectIndividuals — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -257,7 +256,7 @@ Source code for WORC.featureprocessing.SelectIndividuals
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/featureprocessing/StatisticalTestFeatures.html b/WORC/doc/_build/html/_modules/WORC/featureprocessing/StatisticalTestFeatures.html
index da8876cd..8510e9d5 100644
--- a/WORC/doc/_build/html/_modules/WORC/featureprocessing/StatisticalTestFeatures.html
+++ b/WORC/doc/_build/html/_modules/WORC/featureprocessing/StatisticalTestFeatures.html
@@ -8,7 +8,7 @@
- WORC.featureprocessing.StatisticalTestFeatures — WORC 3.6.0 documentation
+ WORC.featureprocessing.StatisticalTestFeatures — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -520,7 +519,7 @@ Source code for WORC.featureprocessing.StatisticalTestFeatures
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/featureprocessing/StatisticalTestThreshold.html b/WORC/doc/_build/html/_modules/WORC/featureprocessing/StatisticalTestThreshold.html
index 0e74c9c1..7e2063bd 100644
--- a/WORC/doc/_build/html/_modules/WORC/featureprocessing/StatisticalTestThreshold.html
+++ b/WORC/doc/_build/html/_modules/WORC/featureprocessing/StatisticalTestThreshold.html
@@ -8,7 +8,7 @@
- WORC.featureprocessing.StatisticalTestThreshold — WORC 3.6.0 documentation
+ WORC.featureprocessing.StatisticalTestThreshold — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -303,7 +302,7 @@ Source code for WORC.featureprocessing.StatisticalTestThreshold
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/featureprocessing/VarianceThreshold.html b/WORC/doc/_build/html/_modules/WORC/featureprocessing/VarianceThreshold.html
index 752b689d..cbfc9ad3 100644
--- a/WORC/doc/_build/html/_modules/WORC/featureprocessing/VarianceThreshold.html
+++ b/WORC/doc/_build/html/_modules/WORC/featureprocessing/VarianceThreshold.html
@@ -8,7 +8,7 @@
- WORC.featureprocessing.VarianceThreshold — WORC 3.6.0 documentation
+ WORC.featureprocessing.VarianceThreshold — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -288,7 +287,7 @@ Source code for WORC.featureprocessing.VarianceThreshold
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/plotting/compute_CI.html b/WORC/doc/_build/html/_modules/WORC/plotting/compute_CI.html
index 7cc98b22..93819df5 100644
--- a/WORC/doc/_build/html/_modules/WORC/plotting/compute_CI.html
+++ b/WORC/doc/_build/html/_modules/WORC/plotting/compute_CI.html
@@ -8,7 +8,7 @@
- WORC.plotting.compute_CI — WORC 3.6.0 documentation
+ WORC.plotting.compute_CI — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -284,7 +283,7 @@ Source code for WORC.plotting.compute_CI
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/plotting/linstretch.html b/WORC/doc/_build/html/_modules/WORC/plotting/linstretch.html
index b5f29baa..fe1b76a9 100644
--- a/WORC/doc/_build/html/_modules/WORC/plotting/linstretch.html
+++ b/WORC/doc/_build/html/_modules/WORC/plotting/linstretch.html
@@ -8,7 +8,7 @@
- WORC.plotting.linstretch — WORC 3.6.0 documentation
+ WORC.plotting.linstretch — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -206,7 +205,7 @@ Source code for WORC.plotting.linstretch
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/plotting/plot_ROC.html b/WORC/doc/_build/html/_modules/WORC/plotting/plot_ROC.html
index fc4bb635..46d80245 100644
--- a/WORC/doc/_build/html/_modules/WORC/plotting/plot_ROC.html
+++ b/WORC/doc/_build/html/_modules/WORC/plotting/plot_ROC.html
@@ -8,7 +8,7 @@
- WORC.plotting.plot_ROC — WORC 3.6.0 documentation
+ WORC.plotting.plot_ROC — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -876,7 +875,7 @@ Source code for WORC.plotting.plot_ROC
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/plotting/plot_barchart.html b/WORC/doc/_build/html/_modules/WORC/plotting/plot_barchart.html
index 6f57b26b..a29ce399 100644
--- a/WORC/doc/_build/html/_modules/WORC/plotting/plot_barchart.html
+++ b/WORC/doc/_build/html/_modules/WORC/plotting/plot_barchart.html
@@ -8,7 +8,7 @@
- WORC.plotting.plot_barchart — WORC 3.6.0 documentation
+ WORC.plotting.plot_barchart — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -516,7 +515,7 @@ Source code for WORC.plotting.plot_barchart
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/plotting/plot_images.html b/WORC/doc/_build/html/_modules/WORC/plotting/plot_images.html
index 20e8a132..41258e05 100644
--- a/WORC/doc/_build/html/_modules/WORC/plotting/plot_images.html
+++ b/WORC/doc/_build/html/_modules/WORC/plotting/plot_images.html
@@ -8,7 +8,7 @@
- WORC.plotting.plot_images — WORC 3.6.0 documentation
+ WORC.plotting.plot_images — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -165,7 +164,7 @@
Source code for WORC.plotting.plot_images
#!/usr/bin/env python
-# Copyright 2016-2021 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2022 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -190,6 +189,7 @@ Source code for WORC.plotting.plot_images
import SimpleITK as sitk
from skimage import morphology
import WORC.addexceptions as ae
+import scipy.ndimage as nd
[docs]def extract_boundary(contour, radius=2):
@@ -220,7 +220,7 @@ Source code for WORC.plotting.plot_images
thresholds=[-5, 5], zoomfactor=4, dpi=500, normalize=True,
expand=False, boundary=False, square=False, flip=True, rot90=0,
alpha=0.40, axis='axial', index=None, color='cyan', radius=2,
- colormap='gray'):
+ colormap='gray', fill=False):
"""Plot slice of image where mask is largest, with mask as overlay.
image and mask should both be arrays
@@ -306,6 +306,11 @@ Source code for WORC.plotting.plot_images
if mask is not None:
maskslice = np.flipud(maskslice)
+ if fill:
+ print('\t Filling holes.')
+ maskslice = nd.binary_fill_holes(maskslice)
+ maskslice = maskslice.astype(np.uint8)
+
if mask is not None:
if boundary:
print('\t Extracting boundary.')
@@ -476,7 +481,7 @@ Source code for WORC.plotting.plot_images
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/plotting/plot_ranked_scores.html b/WORC/doc/_build/html/_modules/WORC/plotting/plot_ranked_scores.html
index 4961ea82..eac95107 100644
--- a/WORC/doc/_build/html/_modules/WORC/plotting/plot_ranked_scores.html
+++ b/WORC/doc/_build/html/_modules/WORC/plotting/plot_ranked_scores.html
@@ -8,7 +8,7 @@
- WORC.plotting.plot_ranked_scores — WORC 3.6.0 documentation
+ WORC.plotting.plot_ranked_scores — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -702,7 +701,7 @@ Source code for WORC.plotting.plot_ranked_scores
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/plotting/scatterplot.html b/WORC/doc/_build/html/_modules/WORC/plotting/scatterplot.html
index a4e5e48c..716daffb 100644
--- a/WORC/doc/_build/html/_modules/WORC/plotting/scatterplot.html
+++ b/WORC/doc/_build/html/_modules/WORC/plotting/scatterplot.html
@@ -8,7 +8,7 @@
- WORC.plotting.scatterplot — WORC 3.6.0 documentation
+ WORC.plotting.scatterplot — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -292,7 +291,7 @@ Source code for WORC.plotting.scatterplot
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/processing/ExtractNLargestBlobsn.html b/WORC/doc/_build/html/_modules/WORC/processing/ExtractNLargestBlobsn.html
index 85f664f4..f584b176 100644
--- a/WORC/doc/_build/html/_modules/WORC/processing/ExtractNLargestBlobsn.html
+++ b/WORC/doc/_build/html/_modules/WORC/processing/ExtractNLargestBlobsn.html
@@ -8,7 +8,7 @@
- WORC.processing.ExtractNLargestBlobsn — WORC 3.6.0 documentation
+ WORC.processing.ExtractNLargestBlobsn — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -254,7 +253,7 @@ Source code for WORC.processing.ExtractNLargestBlobsn
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/processing/classes.html b/WORC/doc/_build/html/_modules/WORC/processing/classes.html
index ca61bd99..af54c3c0 100644
--- a/WORC/doc/_build/html/_modules/WORC/processing/classes.html
+++ b/WORC/doc/_build/html/_modules/WORC/processing/classes.html
@@ -8,7 +8,7 @@
- WORC.processing.classes — WORC 3.6.0 documentation
+ WORC.processing.classes — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -213,7 +212,7 @@ Source code for WORC.processing.classes
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/processing/label_processing.html b/WORC/doc/_build/html/_modules/WORC/processing/label_processing.html
index f838fdbb..fbba2189 100644
--- a/WORC/doc/_build/html/_modules/WORC/processing/label_processing.html
+++ b/WORC/doc/_build/html/_modules/WORC/processing/label_processing.html
@@ -8,7 +8,7 @@
- WORC.processing.label_processing — WORC 3.6.0 documentation
+ WORC.processing.label_processing — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -472,7 +471,7 @@ Source code for WORC.processing.label_processing
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/CalcFeatures_test.html b/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/CalcFeatures_test.html
index 207207a2..485c139b 100644
--- a/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/CalcFeatures_test.html
+++ b/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/CalcFeatures_test.html
@@ -8,7 +8,7 @@
- WORC.resources.fastr_tests.CalcFeatures_test — WORC 3.6.0 documentation
+ WORC.resources.fastr_tests.CalcFeatures_test — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -247,7 +246,7 @@ Source code for WORC.resources.fastr_tests.CalcFeatures_test
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/elastix_test.html b/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/elastix_test.html
index a1bc5350..e8f1db29 100644
--- a/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/elastix_test.html
+++ b/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/elastix_test.html
@@ -8,7 +8,7 @@
- WORC.resources.fastr_tests.elastix_test — WORC 3.6.0 documentation
+ WORC.resources.fastr_tests.elastix_test — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -248,7 +247,7 @@ Source code for WORC.resources.fastr_tests.elastix_test
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/segmentix_test.html b/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/segmentix_test.html
index 4b3701bf..0832fdca 100644
--- a/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/segmentix_test.html
+++ b/WORC/doc/_build/html/_modules/WORC/resources/fastr_tests/segmentix_test.html
@@ -8,7 +8,7 @@
- WORC.resources.fastr_tests.segmentix_test — WORC 3.6.0 documentation
+ WORC.resources.fastr_tests.segmentix_test — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -243,7 +242,7 @@ Source code for WORC.resources.fastr_tests.segmentix_test
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/tools/Elastix.html b/WORC/doc/_build/html/_modules/WORC/tools/Elastix.html
index f7ebc124..3cafabbd 100644
--- a/WORC/doc/_build/html/_modules/WORC/tools/Elastix.html
+++ b/WORC/doc/_build/html/_modules/WORC/tools/Elastix.html
@@ -8,7 +8,7 @@
- WORC.tools.Elastix — WORC 3.6.0 documentation
+ WORC.tools.Elastix — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -488,7 +487,7 @@ Source code for WORC.tools.Elastix
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/tools/Evaluate.html b/WORC/doc/_build/html/_modules/WORC/tools/Evaluate.html
index bc8a8bc4..21c1695a 100644
--- a/WORC/doc/_build/html/_modules/WORC/tools/Evaluate.html
+++ b/WORC/doc/_build/html/_modules/WORC/tools/Evaluate.html
@@ -8,7 +8,7 @@
- WORC.tools.Evaluate — WORC 3.6.0 documentation
+ WORC.tools.Evaluate — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -842,7 +841,7 @@ Source code for WORC.tools.Evaluate
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/tools/Slicer.html b/WORC/doc/_build/html/_modules/WORC/tools/Slicer.html
index d64d783e..c9279754 100644
--- a/WORC/doc/_build/html/_modules/WORC/tools/Slicer.html
+++ b/WORC/doc/_build/html/_modules/WORC/tools/Slicer.html
@@ -8,7 +8,7 @@
- WORC.tools.Slicer — WORC 3.6.0 documentation
+ WORC.tools.Slicer — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -294,7 +293,7 @@ Source code for WORC.tools.Slicer
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/tools/Transformix.html b/WORC/doc/_build/html/_modules/WORC/tools/Transformix.html
index a558fb96..6c12c4b4 100644
--- a/WORC/doc/_build/html/_modules/WORC/tools/Transformix.html
+++ b/WORC/doc/_build/html/_modules/WORC/tools/Transformix.html
@@ -8,7 +8,7 @@
- WORC.tools.Transformix — WORC 3.6.0 documentation
+ WORC.tools.Transformix — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -225,7 +224,7 @@ Source code for WORC.tools.Transformix
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/WORC/tools/createfixedsplits.html b/WORC/doc/_build/html/_modules/WORC/tools/createfixedsplits.html
index 3717fd6f..1d6e71af 100644
--- a/WORC/doc/_build/html/_modules/WORC/tools/createfixedsplits.html
+++ b/WORC/doc/_build/html/_modules/WORC/tools/createfixedsplits.html
@@ -8,7 +8,7 @@
- WORC.tools.createfixedsplits — WORC 3.6.0 documentation
+ WORC.tools.createfixedsplits — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -165,7 +164,7 @@
Source code for WORC.tools.createfixedsplits
#!/usr/bin/env python
-# Copyright 2016-2019 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2022 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -185,13 +184,41 @@ Source code for WORC.tools.createfixedsplits
import WORC.addexceptions as ae
from WORC.processing.label_processing import load_labels
import pandas as pd
+import WORC.processing.label_processing as lp
[docs]def createfixedsplits(label_file=None, label_type=None, patient_IDs=None,
- test_size=0.2, N_iterations=1, regression=False,
- stratify=None, modus='singlelabel', output=None):
+ stratify=True, test_size=0.2, N_iterations=1,
+ modus='singlelabel', output=None):
'''
- Create fixed splits for a cross validation.
+ Create fixed splits for a random-split cross-validation.
+
+
+ Parameters
+ ----------
+ label_file : filepath
+ CSV file containing the labels of the patients.
+ label_type: list of strings
+ labels to extracted from the label file, e.g. ['label1']
+ patient_IDs: list of strings
+ names of patients to take into account. If None, take all
+ stratify: Boolean
+ If True, splits are stratified. In this case, you need to provide
+ label data.
+ test_size: float
+ Percentage of patients in test set per iteration.
+ N_iterations: integer
+ Number of cross-validation iterations
+ modus: str
+ singlelabel or regression. Multilabel not implemented yet.
+ output: filepath
+ csv filename to save output to.
+
+ Returns
+ -------
+ df: pandas Dataframe
+ Fixed splits created.
+
'''
# Check whether input is valid
if patient_IDs is None:
@@ -200,27 +227,37 @@ Source code for WORC.tools.createfixedsplits
label_data = load_labels(label_file, label_type)
patient_IDs = label_data['patient_IDs']
- # Create the stratification object
- if modus == 'singlelabel':
- stratify = label_data['label']
- elif modus == 'multilabel':
- # Create a stratification object from the labels
- # Label = 0 means no label equals one
- # Other label numbers refer to the label name that is 1
- stratify = list()
- labels = label_data['label']
- for pnum in range(0, len(labels[0])):
- plabel = 0
- for lnum, slabel in enumerate(labels):
- if slabel[pnum] == 1:
- plabel = lnum + 1
- stratify.append(plabel)
-
+ else:
+ raise ae.WORCValueError('Either a label file and label type or patient_IDs need to be provided!')
+ else:
+ if stratify is True:
+ if label_file is not None and label_type is not None:
+ # Extract data for specific patients only
+ label_data, _ = lp.findlabeldata(label_file,
+ label_type,
+ pids=patient_IDs)
else:
- raise ae.WORCKeyError('{} is not a valid modus!').format(modus)
+ raise ae.WORCValueError('A label file and label type needs to be provided for stratified splitting!')
+
+ # Create the stratification object
+ if stratify:
+ if modus == 'singlelabel':
+ stratify = label_data['label'][0].tolist()
+ elif modus == 'multilabel':
+ # Create a stratification object from the labels
+ # Label = 0 means no label equals one
+ # Other label numbers refer to the label name that is 1
+ stratify = list()
+ labels = label_data['label']
+ for pnum in range(0, len(labels[0])):
+ plabel = 0
+ for lnum, slabel in enumerate(labels):
+ if slabel[pnum] == 1:
+ plabel = lnum + 1
+ stratify.append(plabel)
else:
- raise ae.WORCIOError('Either a label file and label type or patient_IDs need to be provided!')
-
+ raise ae.WORCKeyError('{} is not a valid modus!').format(modus)
+
pd_dict = dict()
for i in range(N_iterations):
print(f'Splitting iteration {i + 1} / {N_iterations}')
@@ -230,7 +267,8 @@ Source code for WORC.tools.createfixedsplits
# Define stratification
unique_patient_IDs, unique_indices =\
np.unique(np.asarray(patient_IDs), return_index=True)
- if regression:
+
+ if modus == 'regression' or not stratify:
unique_stratify = None
else:
unique_stratify = [stratify[i] for i in unique_indices]
@@ -290,6 +328,22 @@ Source code for WORC.tools.createfixedsplits
df.to_csv(output)
return df
+
+
+[docs]def test():
+ patient_IDs = ['HN1004', 'HN1077', 'HN1088', 'HN1146', 'HN1159', 'HN1192', 'HN1259', 'HN1260',
+ 'HN1323', 'HN1331', 'HN1339', 'HN1342', 'HN1372', 'HN1491', 'HN1501', 'HN1519',
+ 'HN1524', 'HN1554', 'HN1560', 'HN1748']
+ createfixedsplits(label_file=r'C:\Users\Martijn Starmans\Documents\GitHub\WORCTutorial\Data\Examplefiles\pinfo_HN.csv',
+ patient_IDs=patient_IDs, stratify=True,
+ label_type=['imaginary_label_1'], N_iterations=3, output='fixedsplits.csv')
+
+
+if __name__ == "__main__":
+ test()
+
+
+
@@ -302,7 +356,7 @@ Source code for WORC.tools.createfixedsplits
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_modules/index.html b/WORC/doc/_build/html/_modules/index.html
index f97bcc63..6172bc76 100644
--- a/WORC/doc/_build/html/_modules/index.html
+++ b/WORC/doc/_build/html/_modules/index.html
@@ -8,7 +8,7 @@
- Overview: module code — WORC 3.6.0 documentation
+ Overview: module code — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -259,7 +258,7 @@ All modules for which code is available
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/_sources/index.rst.txt b/WORC/doc/_build/html/_sources/index.rst.txt
index 77ec657c..4c699bb4 100644
--- a/WORC/doc/_build/html/_sources/index.rst.txt
+++ b/WORC/doc/_build/html/_sources/index.rst.txt
@@ -35,6 +35,14 @@ The official documentation can be found at `WORC.readthedocs.io `_
+
+ `M. P. A. Starmans, et al. "The WORC database: MRI and CT scans, segmentations, and clinical labels for 930 patients from six radiomics studies." medRxiv preprint https://doi.org/10.1101/2021.08.19.21262238 (2021) `_
+
+ `M. P. A. Starmans "Streamlined Quantitative Imaging Biomarker Development: Generalization of radiomics through automated machine learning ". PhD thesis, 2022, February. Erasmus University Rotterdam. Retrieved from http://hdl.handle.net/1765/137089 `_
+
For more information regarding radiomics, we recommend the following book chapter:
`M. P. A. Starmans, S. R. van der Voort, J. M. Castillo T., J. F. Veenland, S. Klein, W. J. Niessen. "Radiomics: Data mining using quantitative medical image features" Handbook of Medical Image Computing and Computer Assisted Intervention (MICCAI) 2020 `_
@@ -89,7 +97,6 @@ WORC Documentation
static/additionalfunctionality.rst
static/faq.rst
static/developerdocumentation.rst
- static/file_description.rst
static/changelog.rst
WORC User reference
diff --git a/WORC/doc/_build/html/_sources/static/configuration.rst.txt b/WORC/doc/_build/html/_sources/static/configuration.rst.txt
index 0ad52d55..454fd03a 100644
--- a/WORC/doc/_build/html/_sources/static/configuration.rst.txt
+++ b/WORC/doc/_build/html/_sources/static/configuration.rst.txt
@@ -16,7 +16,6 @@ will be ignored. Additionally, .py files from the ``$FASTRHOME/config.d`` folder
as well. You will see that upon installation, WORC has already put a ``WORC_config.py`` file in the
``config.d`` folder.
-% Note: Above was originally from quick start
As ``WORC`` and the default tools used are mostly Python based, we've chosen
to put our configuration in a ``configparser`` object. This has several
advantages:
@@ -90,6 +89,12 @@ WORC on a cluster with nodes supporting only a single core to be used
per node, e.g. the BIGR cluster, use only 1 core and threading as a
backend.
+.. note::
+
+ If you want to override configuration fields that are fingerprinted, e.g.
+ the preprocessing, turn the fingerprinting off.
+
+
**Description:**
.. include:: ../autogen/config/WORC.config_General_description.rst
@@ -99,6 +104,8 @@ backend.
.. include:: ../autogen/config/WORC.config_General_defopts.rst
+
+
.. _config-Labels:
Labels
@@ -142,6 +149,23 @@ set: ``config[Labels][label_names] = Label1, Label2``
.. include:: ../autogen/config/WORC.config_Labels_defopts.rst
+.. _config-Fingerprinting:
+
+Fingerprinting
+~~~~~~~~~~~~~~~
+The fingerprinting nodes are the first computational nodes to create
+a fingerprint of your dataset and accordingly adjust some configuration
+settings, see the `WORC paper `_.
+
+**Description:**
+
+.. include:: ../autogen/config/WORC.config_Fingerprinting_description.rst
+
+**Defaults and Options:**
+
+.. include:: ../autogen/config/WORC.config_Fingerprinting_defopts.rst
+
+
.. _config-Preprocessing:
Preprocessing
@@ -152,6 +176,14 @@ as DICOM are scaled to Hounsfield Units. For more details on the preprocessing
options, please see
:ref:`the additional functionality chapter `.
+
+.. note::
+
+ As several preprocessing functions are fingerprinted, if you want to edit
+ these configuration settings yourself, please turn of the fingerprinting,
+ see the :ref:`General section of the config `.
+
+
**Description:**
.. include:: ../autogen/config/WORC.config_Preprocessing_description.rst
@@ -160,7 +192,6 @@ options, please see
.. include:: ../autogen/config/WORC.config_Preprocessing_defopts.rst
-
.. _config-Segmentix:
Segmentix
diff --git a/WORC/doc/_build/html/_sources/static/quick_start.rst.txt b/WORC/doc/_build/html/_sources/static/quick_start.rst.txt
index 1289e11b..fcbafc19 100644
--- a/WORC/doc/_build/html/_sources/static/quick_start.rst.txt
+++ b/WORC/doc/_build/html/_sources/static/quick_start.rst.txt
@@ -3,9 +3,6 @@
Quick start guide
=================
-This manual will show users how to install WORC, configure WORC and construct and run a simple experiment.
-It's exactly the same as the `WORC Tutorial `_.
-
.. _installation-chapter:
Installation
@@ -49,15 +46,17 @@ Tutorials
---------
To start out using WORC, we strongly recommend you to follow the tutorials located in the
`WORCTutorial Github `_. This repository
-contains tutorials for an introduction to WORC, as well as more advanced workflows.
+contains tutorials for an introduction to WORC, as well as more advanced workflows. We recommend
+starting with the WORCTutorialSimple, of which the part below is an exact copy.
If you run into any issue, you can first debug your network using
`the fastr trace tool `_.
-If you're stuck, feel free to post an issue on the `WORC Github `_.
+If you're stuck, check out the FAQ first at https://worc.readthedocs.io/en/latest/static/faq.html,
+or feel free to post an issue on the `WORC Github `_.
Running an experiment
----------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
We strongly recommend you to follow the tutorials, see the section above. In this section,
a point by point description of the tutorial is given.
@@ -177,9 +176,11 @@ After defining the inputs, the following code can be used to run your first expe
# Set the input data according to the variables we defined earlier
experiment.images_from_this_directory(imagedatadir,
- image_file_name=image_file_name)
+ image_file_name=image_file_name,
+ is_training=True)
experiment.segmentations_from_this_directory(imagedatadir,
- segmentation_file_name=segmentation_file_name)
+ segmentation_file_name=segmentation_file_name,
+ is_training=True)
experiment.labels_from_this_file(label_file)
experiment.predict_labels(label_name)
@@ -187,7 +188,7 @@ After defining the inputs, the following code can be used to run your first expe
# Valid quantitative types are ['CT', 'PET', 'Thermography', 'ADC']
# Valid qualitative types are ['MRI', 'DWI', 'US']
experiment.set_image_types(['CT'])
-
+
# Use the standard workflow for your specific modus
if modus == 'binary_classification':
experiment.binary_classification(coarse=coarse)
@@ -225,12 +226,18 @@ named after your experiment name.
'Features',
'features_*.hdf5'))
+ if len(feature_files) == 0:
+ raise ValueError('No feature files found: your network has failed.')
+
feature_files.sort()
featurefile_p1 = feature_files[0]
features_p1 = pd.read_hdf(featurefile_p1)
# Read the overall peformance
performance_file = os.path.join(experiment_folder, 'performance_all_0.json')
+ if not os.path.exists(performance_file):
+ raise ValueError(f'No performance file {performance_file} found: your network has failed.')
+
with open(performance_file, 'r') as fp:
performance = json.load(fp)
@@ -246,7 +253,7 @@ named after your experiment name.
for k, v in stats.items():
print(f"\t {k} {v}.")
-.. note:: the performance is probably horrible, which is expected as we ran the experiment on coarse settings. These settings are recommended to only use for testing: see also below.
+.. note:: The performance is probably horrible, which is expected as we ran the experiment on coarse settings. These settings are recommended to only use for testing: see also below.
Tips and Tricks
@@ -254,19 +261,31 @@ Tips and Tricks
For tips and tricks on running a full experiment instead of this simple
example, adding more evaluation options, debugging a crashed network etcetera,
-please go to :ref:`User Manual ` chapter.
+please go to
We advice you to look at the docstrings of the SimpleWORC functions
introduced in this tutorial, and explore the other SimpleWORC functions,
s SimpleWORC offers much more functionality than presented here.
+For tips and tricks on running a full experiment instead of this simple
+example, adding more evaluation options, debugging a crashed network etcetera,
+please go to :ref:`User Manual ` chapter or
+the :ref:`Additional functionality ` chapter. If you
+run into any issues, check the :ref:`FAQ `,
+make an issue on the WORC Github, or feel free to mail me.
+
+We advice you to look at the docstrings of the SimpleWORC functions
+introduced in this tutorial, and explore the other SimpleWORC functions,
+as SimpleWORC offers much more functionality than presented here, see
+the documentation: https://worc.readthedocs.io/en/latest/autogen/WORC.facade.html#WORC.facade.simpleworc.SimpleWORC
+
Some things we would advice to always do:
* Run actual experiments on the full settings (coarse=False):
-.. code-block:: python
+ .. code-block:: python
- coarse = False
- experiment.binary_classification(coarse=coarse)
+ coarse = False
+ experiment.binary_classification(coarse=coarse)
.. note:: This will result in more computation time. We therefore recommmend
to run this script on either a cluster or high performance PC. If so,
@@ -275,16 +294,33 @@ Some things we would advice to always do:
.. code-block:: python
- experiment.set_multicore_execution()
+ experiment.set_multicore_execution()
This is not required when running WORC on the BIGR or SURFSara Cartesius cluster,
as automatic detectors for these clusters have been built into SimpleWORC and BasicWORC.
* Add extensive evaluation: ``experiment.add_evaluation()`` before ``experiment.execute()``:
-.. code-block:: python
+ .. code-block:: python
+
+ experiment.add_evaluation()
+
+ See the "Outputs and evaluation of your network" section in the :ref:`User Manual `
+ chapter for more details on the evaluation outputs.
+
+Changing fields in the configuration can be done with the add_config_overrides function, see below.
+We recommend doing this after the modus part, as these also perform config_overrides.
+NOTE: all configuration fields have to be provided as strings.
+
+ .. code-block:: python
+
+ overrides = {
+ 'Classification': {
+ 'classifiers': 'SVM',
+ },
+ }
- experiment.add_evaluation()
+ experiment.add_config_overrides(overrides)
-For a complete overview of all functions, please look at the
-:ref:`Config chapter `.
+ For a complete overview of all configuration functions, please look at the
+ :ref:`Config chapter `.
diff --git a/WORC/doc/_build/html/_sources/static/user_manual.rst.txt b/WORC/doc/_build/html/_sources/static/user_manual.rst.txt
index a8cb3dae..16fd55b2 100644
--- a/WORC/doc/_build/html/_sources/static/user_manual.rst.txt
+++ b/WORC/doc/_build/html/_sources/static/user_manual.rst.txt
@@ -9,70 +9,169 @@ and describe the more advanced features.
.. _tools:
-Interacting with WORC
----------------------
-The WORC toolbox is build around of one main object, the WORC object. This object provides all functionality
+WORC object and facades
+------------------------
+
+The WORC toolbox is build around of one main object, the ``WORC`` object. This object provides all functionality
of the toolbox. However, to make certain functionalities easier to use and limit the complexity,
-we have constructed two facades. The ``SimpleWORC`` facade is the simplest to interact with and provides
-all functionality required for conducting basic experiments. The ``BasicWORC`` object is based on the ``SimpleWORC``
-object and provides several more advances functions. The specific functionalities of these two facades and the
-``WORC`` object itself can be found in this section.
+we have constructed two facades: ``SimpleWORC`` and ``BasicWORC``. We advice new users to start with ``SimpleWORC``,
+more advanced users ``BasicWORC``, and only use ``WORC`` for development purposes. Additionally, we advice you to take a look at the :ref:`configuration chapter `
+for all the settings that can be adjusted in WORC.
-For documentation on ``SimpleWORC`` and ``BasicWORC``, please look at the documentation
-within those modules: :py:mod:`WORC.facade.simpleworc` and :py:mod:`WORC.facade.basicworc`. Many of the functions are actually wrappers to interact with the WORC
-object, and therefore use the functionality described below. For basic usage, only using
-``SimpleWORC``, it's respective documentation and the
-`WORCTutorial Github `_ should be sufficient.
+The specific functionalities of these two facades and the ``WORC`` object itself can be found in this section.
-Additionally, we advice you to take a look at the :ref:`configuration chapter `
-for all the settings that can be adjusted in ``WORC``.
+SimpleWORC
+~~~~~~~~~~~~~~~~
+The ``SimpleWORC`` facade is the simplest to interact with and provides
+all functionality required for conducting basic experiments.
+Much of the documentation of ``SimpleWORC`` can be found in its tutorial (https://github.com/MStarmans91/WORCtutorial and
+:ref:`the quick start `) and the docstrings of the functions in the object (:py:mod:`WORC.facade.simpleworc`).
+Many of the functions are wrappers to interact with the ``WORC`` object, and therefore in the background use the functionality described below.
-The WORC Object
+BasicWORC
~~~~~~~~~~~~~~~~
+The ``BasicWORC`` object is based on the ``SimpleWORC`` object, and thus provides exactly the same functionality,
+plus several more advances functions. Much of the documentation of ``BasicWORC`` can be found in its tutorial (https://github.com/MStarmans91/WORCtutorial)
+and the docstrings of the functions in the object (:py:mod:`WORC.facade.basicworc`).
+
+One of the functionalities that ``BasicWORC`` provides over ``SimpleWORC`` is that you can also directly provide
+your data to ``WORC`` (e.g. ``images_train``) instead of using one of the wrapping functions of
+``SimpleWORC`` (e.g. ``images_from_this_directory)
+
+.. _WORC:
+
+WORC
+~~~~~~~~~~~~~~~
+The ``WORC`` object can directly be assessed in the following way:
.. code-block:: python
import WORC
network = WORC.WORC('somename')
It's attributes are split in a couple of categories. We will not discuss
-the WORC.defaultconfig() function here, which generates the default
+the ``WORC.defaultconfig()`` function here, which generates the default
configuration, as it is listed in a separate page, see the :ref:`Config chapter `.
More detailed documentation of the various functions can be found in the docstrings of :py:mod:`WORC.WORC`:
we will mostly focus on the attributes, inputs, outputs and workflows here.
+There are numerous ``WORC`` attributes which serve as source nodes (i.e. inputs) for the
+FASTR network. These are:
-Input file definitions
-----------------------
+- ``images_train`` and ``images_test``
+- ``segmentations_train`` and ``segmentations_test``
+- ``semantics_train`` and ``semantics_test``
+- ``labels_train`` and ``labels_test``
+- ``masks_train`` and ``masks_test``
+- ``features_train`` and ``features_test``
+- ``metadata_train`` and ``metadata_test``
+- ``Elastix_Para``
+- ``fastrconfigs``
-Attributes: Sources
-~~~~~~~~~~~~~~~~~~~
+These directly correspond to the :ref:`input file definitions discussed below `
+How to provide your data to ``WORC`` is also described in this section.
-There are numerous WORC attributes which serve as source nodes for the
-FASTR network. These are:
+After supplying your sources as described above, you need to build the FASTR network. This
+can be done through the ``WORC.build()`` command. Depending on your sources,
+several nodes will be added and linked. This creates the ``WORC.network``
+object, which is a ``fastr.network`` object. You can edit this network
+freely, e.g. add another source or node. You can print the network with
+the ``WORC.network.draw_network`` command.
+Next, we have to tell the network which sources should be used in the
+source nodes. This can be done through the ``WORC.set()`` function. This will
+put your supplied sources into the source nodes and also creates the
+needed sink nodes. You can check these by looking at the created
+``WORC.source_data`` and ``WORC.sink_data`` objects.
-- images_train and images_test
-- segmentations_train and segmentations_test
-- semantics_train and semantics_test
-- labels_train and labels_test
-- masks_train and masks_test
-- features_train and features_test
-- metadata_train and metadata_test
-- Elastix_Para
-- fastrconfigs
+Finally, after completing above steps, you can execute the network
+through the ``WORC.execute()`` command.
+Thus a typical experiment in ``WORC`` would follow the following structure,
+assuming you have created the relevant objects as listed above:
-When using a single dataset for both training and evaluation, you should
-supply all sources in train objects. By default, performance on a single
-dataset will be evaluated using cross-validation. Optionally, you can supply
-a separate training and test set.
+.. code-block:: python
+
+ import WORC
+
+ # Create object
+ experiment = WORC.WORC('name')
+
+ # Append sources
+ experiment.images_train.append(images_train)
+ experiment.segmentations_train.append(segmentations_train)
+ experiment.labels_train.append(labels_train)
+
+ # Create a configuration
+ config = experiment.defaultconfig()
+ experiment.configs.append(config)
+
+ # Build, set, and execute
+ network.build()
+ network.set()
+ network.execute()
+
+
+.. _inputs:
+
+Input file definitions and how to provide them to WORC
+-------------------------------------------------------
-Each source should be given as a dictionary of strings corresponding to
-the source filenames. Each element should correspond to a single object for the classification,
-e.g. a patient. The keys are used to match the features to the
-label and semantics sources, so make sure these correspond to the label
-file.
+Providing your inputs to WORC and data flows
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Let's first start on how to provide any of the below mentioned types of input data to ``WORC``.
+``WORC`` facilitates different data flows (or networks or pipelines), which are automatically
+constructed based on the inputs and configuration you provide. We here
+discuss how the data can be set in ``BasicWORC`` and ``WORC``:
+``SimpleWORC`` provides several wrappers to more easily provide data, which interact with
+thee objects.
+As an example, we here show how to provide images and segmentations to ``BasicWORC`` and ``WORC``.
+
+.. code-block:: python
+
+ images1 = {'patient1': '/data/Patient1/image_MR.nii.gz', 'patient2': '/data/Patient2/image_MR.nii.gz'}
+ segmentations1 = {'patient1': '/data/Patient1/seg_tumor_MR.nii.gz', 'patient2': '/data/Patient2/seg_tumor_MR.nii.gz'}
+
+ network.images_train.append(images1)
+ network.segmentations_train.append(segmentations1)
+
+Here ``network`` can be a ``BasicWORC`` or ``WORC`` object. Each source is a list, to which you can provide
+dictionaries containing the actual sources. In these dictionaries, each element should correspond to a single
+object for classification, e.g., a patient or a lesions. The keys indicate
+the ID of the element, e.g. the patient name, while the values should be strings corresponding to
+the source filenames. The keys are used to match the images and segmentations to the
+label and semantics sources, so make sure these correspond to the label file.
+
+.. note:: You have to make sure the images and segmentation (and other) sources match in size,
+ i.e., that the same keys are present.
+
+.. note:: You have to supply a configuration file for each image or feature source you append.
+ Thus, in the first example above, you need to append two configurations!
+
+Using multiple sources per patient
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+If you want to provide multiple sources, e.g. images, per patient, simply append another dictionary
+to the source list, e.g.:
+
+.. code-block:: python
+
+ images1 = {'patient1': '/data/Patient1/image_MR.nii.gz', 'patient2': '/data/Patient2/image_MR.nii.gz'}
+ images2 = {'patient1': '/data/Patient1/image_CT.nii.gz', 'patient2': '/data/Patient2/image_CT.nii.gz'}
+ segmentations1 = {'patient1': '/data/Patient1/seg_tumor_MR.nii.gz', 'patient2': '/data/Patient2/seg_tumor_MR.nii.gz'}
+ segmentations2 = {'patient1': '/data/Patient1/seg_tumor_CT.nii.gz', 'patient2': '/data/Patient2/seg_tumor_CT.nii.gz'}
+
+ network.images_train.append(images1)
+ network.images_train.append(images2)
+
+ network.segmentations_train.append(segmentations1)
+ network.segmentations_train.append(segmentations2)
+
+
+``WORC`` will use the keys of the dictionaries to match the features from the same object or patient and combine
+them for the machine learning part.
+
+Mutiple ROIs or segmentations per object/patient
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
You can off course have multiple images or ROIs per object, e.g. a liver
ROI and a tumor ROI. This can be easily done by appending to the
sources. For example:
@@ -89,8 +188,8 @@ sources. For example:
network.segmentations_train.append(segmentations1)
network.segmentations_train.append(segmentations2)
-When using multiple sequences per patients (e.g. T1 and T2), the same
-appending procedure can be used.
+``WORC`` will use the keys of the dictionaries to match the features from the same object or patient and combine
+them for the machine learning part.
If you want to use multiple ROIs independently per patient, e.g. multiple tumors, you can do so
by simply adding them to the dictionary. To make sure the data is still split per patient in the
@@ -104,24 +203,79 @@ cross-validation, please add a sample number after an underscore to the key, e.g
If your label file (see below) contains the label ''patient1'', both samples will get this label
in the classification.
-.. note:: You have to make sure the images and segmentation sources match in size.
+.. note:: ``WORC`` will automatically group all samples from a patient either all in the training
+ or all in the test set.
-.. note:: You have to supply a configuration file for each image or feature source you append.
- Thus, in the first example above, you need to append two configurations!
+Training and test sets
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+When using a single dataset for both training and evaluation, you should
+only supply "training" datasets. By default, performance on a single
+dataset will be evaluated using cross-validation (default random split, but leave-one-out can also be configured).
+Alternatively, you can supply a separate training and test set, by which you tell
+``WORC`` to use this single train-test split. To distinguish between these, for every source, we have a
+train and test object which you can set:
-.. note:: When you use
- multiple image sequences, you can supply a ROI for each sequence by
- appending to to segmentations object. Alternatively, when you do not
- supply a segmentation for a specific sequence, WORC will use Elastix to
- align this sequence to another through image registration. It will then
- warp the segmentation from this sequence to the sequence for which you
- did not supply a segmentation. **WORC will always align these sequences with no segmentations to the first sequence, i.e. the first object in the images_train list.**
- Hence make sure you supply the sequence for which you have a ROI as the first object.
+.. code-block:: python
-Images and segmentations
-^^^^^^^^^^^^^^^^^^^^^^^^
+ images_train = {'patient1': '/data/Patient1/image_MR.nii.gz', 'patient2': '/data/Patient2/image_MR.nii.gz'}
+ segmentations_train = {'patient1': '/data/Patient1/seg_tumor_MR.nii.gz', 'patient2': '/data/Patient2/seg_tumor_MR.nii.gz'}
+
+ network.images_train.append(images_train)
+ network.segmentations_train.append(segmentations_train)
+
+ images_test = {'patient3': '/data/Patient3/image_MR.nii.gz', 'patient4': '/data/Patient4/image_MR.nii.gz'}
+ segmentations_test = {'patient3': '/data/Patient3/seg_tumor_MR.nii.gz', 'patient4': '/data/Patient4/seg_tumor_MR.nii.gz'}
+
+ network.images_test.append(images_test)
+ network.segmentations_test.append(segmentations_test)
-The minimal input for a Radiomics pipeline consists of either images
+Another alternative is to only provide training objects, but also a .csv defining fixed training and test splits to be used for the
+evaluation, e.g. ``network.fixed_splits = '/data/fixedsplits.csv``. See the https://github.com/MStarmans91/WORCtutorial repository for an example. ``SimpleWORC`` has the ``set_fixed_splits`` to set this object.
+
+Missing data and dummy's
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+Suppose you are missing a specific image for a specific patient. ``WORC`` can impute the features of this patient.
+The underlying package we use for workflow execution (fastr) can however handle missing data. Therefore, to tell ``WORC`` to
+do so, you still have to provide a source but can add ''Dummy'' to the key:
+
+.. code-block:: python
+
+ images1 = {'patient1': '/data/Patientc/image_MR.nii.gz', 'patient2_Dummy': '/data/Patient1/image_MR.nii.gz'}
+ segmentations1 = {'patient1': '/data/Patient1/seg_tumor_MR.nii.gz', 'patient2_Dummy': '/data/Patient1/seg_tumor_MR.nii.gz'}
+
+ network.images_train.append(images1)
+ network.segmentations_train.append(segmentations1)
+
+``WORC`` will process the sources normally up till the imputation part, so you have to provide valid data. As you see in the example above,
+we simply provided data from another patient.
+
+Segmentation on the first image, but not on the others
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+When you use multiple image sequences, you can supply a ROI for each sequence by
+appending to to segmentations object as above. Alternatively, when you do not
+supply a segmentation for a specific sequence, ``WORC`` will use Elastix to
+align this sequence to another through image registration. It will then
+warp the segmentation from this sequence to the sequence for which you
+did not supply a segmentation. **WORC will always align these sequences with no segmentations to the first sequence, i.e. the first object in the images_train list.**
+Hence make sure you supply the sequence for which you have a ROI as the first object:
+
+.. code-block:: python
+
+ images1 = {'patient1': '/data/Patient1/image_MR.nii.gz', 'patient2': '/data/Patient2/image_MR.nii.gz'}
+ images2 = {'patient1': '/data/Patient1/image_CT.nii.gz', 'patient2': '/data/Patient2/image_CT.nii.gz'}
+ segmentations1 = {'patient1': '/data/Patient1/seg_tumor_MR.nii.gz', 'patient2': '/data/Patient2/seg_tumor_MR.nii.gz'}
+
+ network.images_train.append(images1)
+ network.images_train.append(images2)
+
+ network.segmentations_train.append(segmentations1)
+
+When providing only a segmentation for the first image in this way, ``WORC`` will automatically
+recognize that it needs to use registration.
+
+Images and segmentations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The minimal input for a radiomics pipeline consists of either images
plus segmentations, or features, plus a label file (and a configuration,
but you can just use the default one).
@@ -132,7 +286,7 @@ image formats such as DICOM, NIFTI, TIFF, NRRD and MHD.
.. _um-labels:
Labels
-^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
The labels are predicted in the classification: should be a .txt or .csv file.
The first column should head ``Patient`` and contain the patient ID. The next columns
can contain labels you want to predict, e.g. tumor type, risk, genetics. For example:
@@ -151,30 +305,30 @@ can contain labels you want to predict, e.g. tumor type, risk, genetics. For exa
These labels are matched to the correct image/features by the sample names of the image/features. So in this
case, your sources should look as following:
-
.. code-block:: python
images_train = {'patient1': ..., 'patient2': ..., ...}
segmentations_train = {'patient1': ..., 'patient2': ..., ...}
-Semantics
-^^^^^^^^^
-Semantic features are non-computational features and are extracted using PREDICT. Examples include
+.. note:: ``WORC`` will automatically group all samples from a patient either all in the training
+ or all in the test set.
+
+Semantics or non-radiomics features
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Semantic features are non-computational features, thus features that you supply instead of extract. Examples include
using the age and sex of the patients in the classification. You can
supply these as a .csv listing your features per patient, similar to the :ref:`label file `
-
Masks
-^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
WORC contains a segmentation preprocessing tool, called segmentix.
The idea is that you can manipulate
your segmentation, e.g. using dilation, then use a mask to make sure it
is still valid. See the :ref:`config chapter ` for all segmentix options.
-
Features
-^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If you already computed your features, e.g. from a previous run, you can
directly supply the features instead of the images and segmentations and
skip the feature computation step. These should be stored in .hdf5 files
@@ -182,7 +336,7 @@ matching the WORC format.
Metadata
-^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
This source can be used if you want to use tags from the DICOM header as
features, e.g. patient age and sex. In this case, this source should
contain a single DICOM per patient from which the tags that are read.
@@ -190,9 +344,8 @@ Check the PREDICT.imagefeatures.patient_feature module for the currently
implemented tags.
-
Elastix_Para
-^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
If you have multiple images for each patient, e.g. T1 and T2, but only a
single segmentation, you can use image registration to align and
transform the segmentation to the other modality. This is done in WORC
@@ -205,74 +358,34 @@ map and pass this object to ``WORC``.
``WORC.images_train`` (or test) source you supply. The segmentation
will be alligned to all other image sources.
+.. _um-evaluation:
+Outputs and evaluation of your network
+---------------------------------------
+General remark: when we talk about a sample, we mean one sample that has a set of features associated with it and is thus used as such in the model training or evaluation.
+A sample can correspond with a single patient, but if you have multiple tumors per patient for which features are separately extracted per tumor, these can be treated as separate sample.
-Construction and execution commands
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-After supplying your sources as described above, you need to build the FASTR network. This
-can be done through the ``WORC.build()`` command. Depending on your sources,
-several nodes will be added and linked. This creates the ``WORC.network``
-object, which is a ``fastr.network`` object. You can edit this network
-freely, e.g. add another source or node. You can print the network with
-the ``WORC.network.draw_network`` command.
-
-
-Next, we have to tell the network which sources should be used in the
-source nodes. This can be done through the ``WORC.set()`` function. This will
-put your supplied sources into the source nodes and also creates the
-needed sink nodes. You can check these by looking at the created
-``WORC.source_data`` and ``WORC.sink_data`` objects.
-
-Finally, after completing above steps, you can execute the network
-through the ``WORC.execute()`` command.
+The following outputs and evaluation methods are always generated:
-Thus a typical experiment in ``WORC`` would follow the following structure,
-assuming you have created the relevant objects as listed above:
+.. note:: For every output file, fastr generates a provenance file (``...prov.json``) stating how a file was generated, see https://fastr.readthedocs.io/en/stable/static/user_manual.html#provenance.
-.. code-block:: python
-
- import WORC
-
- # Create object
- experiment = WORC.WORC('name')
-
- # Append sources
- experiment.images_train.append(images_train)
- experiment.segmentations_train.append(segmentations_train)
- experiment.labels_train.append(labels_train)
-
- # Create a configuration
- config = experiment.defaultconfig()
- experiment.configs.append(config)
-
- # Build, set, and execute
- network.build()
- network.set()
- network.execute()
-
-
-Evaluation of your network
---------------------------
-
-In WORC, there are two options for testing your fitted models:
-
-1. Single dataset: cross-validation (currently only random-split)
-2. Separate train and test dataset: bootstrapping on test dataset
-
-Within these evaluation settings, the following performance evaluation methods are used:
-
-1. Confidence intervals on several metrics:
+1. Performance of your models (main output).
+ Stored in file ``performance_all_{num}.json``. If you created multiple models to predict multiple labels, or did multilabel classification, the ``{num}`` corresponds
+ to the label. The file consists of three parts.
+
+ **Mean and 95% confidence intervals of several performance metrics.**
+
For classification:
a. Area under the curve (AUC) of the receiver operating characteristic (ROC) curve. In a multiclass setting, weuse the multiclass AUC from the `TADPOLE Challenge `_.
b. Accuracy.
- c. Balanced Classification Accuracy (BCA) as defined by the `TADPOLE Challenge `_.
+ c. Balanced Classification Accuracy (BCA), based on Balanced Classification Rate by `Tharwat, A., 2021. Classification assessment methods. Applied Computing and Informatics 17, 168–192.`.
d. F1-score
- e. Sensitivity, aka recall or true positive rate
- f. Specificity, aka true negative rate
+ e. Sensitivity or recall or true positive rate
+ f. Specificity or true negative rate
g. Negative predictive value (NPV)
- h. Precision, aka Positive predictive value (PPV)
+ h. Precision or Positive predictive value (PPV)
For regression:
@@ -294,36 +407,126 @@ Within these evaluation settings, the following performance evaluation methods a
In bootstrapping, 95% confidence intervals are created using the ''standard'' method according to a normal distribution: see Table 6, method 1 in `Efron B., Tibshirani R. Bootstrap Methods for Standard Errors,
Confidence Intervals, and Other Measures of Statistical Accuracy, Statistical Science Vol.1, No,1, 54-77, 1986`.
-2. ROC and PRC curves with 95% confidence intervals using the fixed-width bands method, see `Macskassy S. A., Provost F., Rosset S. ROC Confidence Bands: An Empirical Evaluation. In: Proceedings of the 22nd international conference on Machine learning. 2005.`
+ **Rankings of your samples**
+ In thid dictionary, the "Percentages" part shows how often a sample was classified correctly
+ when that sample was in the test set. The number of times the sample was in in the test set is also listed.
+ Those samples that were always classified correctly or always classified incorrecty are also named, including their ground truth label.
+
+ **The metric values for each train-test cross-validation iteration**
+ These are where the confidence intervals are based upon.
+
+2. The configuration used by WORC.
+
+ Stored in files ``config_{type}_{num}.ini``. These are the result of the fingerprinting of your dataset. The ``config_all_{num}.ini`` config is used in classification, the other types
+ are used for feature extraction and are named after the image types you provided. For example, if you provided two image types, ``['MRI', 'CT']``, you will get
+ ``config_MRI_0.ini`` and ``config_CT_0.ini``. If you provide multiple of the same types, the numbers will change. The fields correspond with those from :ref:`configuration chapter `.
+
+3. The fitted models.
+
+ Stored in file ``estimator_all_{num}.hdf5``. Contains a pandas dataframe, with inside a pandas series per label for which WORC fitted a model, commonly just one.
+ The series contains the following attributes:
+
+ - classifiers: a list with per train-test cross-validation, the fitted model on the training set. These are thus the actually fitted models.
+ - X_train: a list with per train-test cross-validation, a list with for each sample in the training set all feature values. These can be used in re-fitting.
+ - Y_train: a list with per train-test cross-validation, a list with for each sample in the training set the ground truth labels. These can be used in re-fitting.
+ - patient_ID_train: a list with per train-test cross-validation, a list with the labels of all samples included in the training set.
+ - X_test: a list with per train-test cross-validation, a list with for each sample in the test set all feature values. These can be used in re-fitting.
+ - X_test: a list with per train-test cross-validation, a list with for each sample in the test set the ground truth labels. These can be used in re-fitting.
+ - patient_ID_test: a list with per train-test cross-validation, a list with the labels of all samples included in the test set.
+ - config: the WORC config used. Corresponds to the ``config_all_{num}.ini`` file mentioned above.
+ - random-seed: a list with per train-test cross-validation, the random seed used in splitting the train and test dataset.
+ - feature_labels: the names of the features. As these are the same for all samples, only one set is provided.
+
+4. The extracted features.
+
+ Stored in the ``Features`` folder, in the files ``features_{featuretoolboxname}_{image_type}_{num}_{sample_id}.hdf5``. Contains a panas series wih the following attributes:
+
+ - feature_labels: the labels or names of the features.
+ - feature_values: the value of the features. Each element corresponds with the same element from the feature_labels attribute.
+ - parameters: the parameters used in the feature extraction. Originate from the WORC config.
+ - image_type: the type of the image that was used, which you as user provided. Used in the feature labels to distinguish between features extracted from different images.
+
+The following outputs and evaluation methods are only created when ``WORC.add_evaluation()`` is used (similar for ``SimpleWORC`` and ``BasicWORC``),
+and are stored in the ``Evaluation`` in the output folder of your experiment.
-3. Univariate statistical testing of the features using:
+1. Receiver Operating Characteristic (ROC) and Precision-Recall (PR) curves.
+
+ Stored in files ``ROC_all_{num}.{ext}`` and ``PRC_all_{num}.{ext}``. For each curve, a ``.png`` is generated for previewing, a ``.tex`` with tikzplotlib
+ which can be used to plot the figure in LateX in high quality, and a ``.csv`` with the confidence intervals so you can easily check these.
+
+ 95% confidence bands are constructured using the fixed-width bands method from `Macskassy S. A., Provost F., Rosset S. ROC Confidence Bands: An Empirical Evaluation. In: Proceedings of the 22nd international conference on Machine learning. 2005.`
+
+2. Univariate statistical testing of the features.
+
+ Stored in files ``StatisticalTestFeatures_all_{num}.{ext}``. A ``.png`` is generated for previewing, a ``.tex`` with tikzplotlib
+ which can be used to plot the figure in LateX in high quality, and a ``.csv`` with the p-values.
+
+ The following statistical tests are used:
a. A student t-test
b. A Welch test
c. A Wilcoxon test
d. A Mann-Whitney U test
- The uncorrected p-values for all these tests are reported in a single excel sheet. Pick the right test and significance
- level based on your assumptions. Normally, we make use of the Mann-Whitney U test, as our features do not have to be normally
- distributed, it's nonparametric, and assumes independent samples.
+ The uncorrected p-values for all these tests are reported in a the .csv. Pick the right test and significance
+ level based on your assumptions.
+
+ Normally, we make use of the Mann-Whitney U test, as our features do not have to be normally
+ distributed, it's nonparametric, and assumes independent samples. Additionally, generally correction should be done
+ for multiple testing, which we always do with Bonferonni correction. Hence, .png and .tex files contain the
+ p-values of the Mann-Whitney U; the p-value of the magenta statistical significance has been corrected with
+ Bonferonni correction.
+
+3. Overview of hyperparameters used in the top ranked models.
+
+ Stored in file ``Hyperparameters_all_{num}.csv``.
+
+ Each row corresponds with the hyperparameters of one workflow. The following information is displayed in the respective columns:
+
+ A. The cross-validation iteration.
+ B. The rank of that workflow in that cross-validation.
+ C. The metric on which the ranking in column B was based.
+ D. The mean score on the validation datasets in the nested cross-validation of the metric in column C.
+ E. The mean score on the training datasets in the nested cross-validation of the metric in column C.
+ F. The mean time it took to fit that workflow in the validation datasets.
+ G. and further: the actual hyperparameters.
+
+ For how many of the top ranked workflows the hyperparameters are included in this file depends on the ``config["Ensemble"]["Size"]``, see :ref:`configuration chapter `.
+
+4. Boxplots of the features.
+
+ Stored in ``BoxplotsFeatures_all_{num}.zip``. The .zip files contains multiple .png files, each with maximum 25 boxplots of features.
+
+ For the full **training** dataset (i.e., if a separate test-set is provided, this is not included in these plots.), per features, one boxplot
+ is generated depicting the distribution of features for all samples (blue), and for binary classification, also only for the samples
+ with label 0 (green) and for the samples with label 1 (red). Hence, this gives an impression whether some features show major differences
+ in the distribution among the different classes, and thus could be useful in the classification to separate them.
-4. Ranking patients from typical to atypical as determined by the model, based on either:
+5. Ranking patients from typical to atypical as determined by the model.
+
+ Stored in files ``RankedPosteriors_all_{num}.{ext}`` and ``RankedPercentages_all_{num}.{ext}``.
+
+ Two types of rankings are done:
a. The percentage of times a patient was classified correctly when occuring in the test set. Patients always correctly classified
can be seen as typical examples; patients always classified incorrectly as atypical.
b. The mean posterior of the patient when occuring in the test set.
- These measures can only be used in classification. Besides an Excel with the rankings, snapshots of the middle slice
- of the image + segmentation are saved with the ground truth label and the percentage/posterior in the filename. In
- this way, one can scroll through the patients from typical to atypical to distinguish a pattern.
+ These measures can only be used in classification. Besides a .csv with the rankings, snapshots of the middle slice
+ of the image + segmentation are saved with the ground truth label and the percentage/posterior in the filename in
+ a .zip file. In this way, one can scroll through the patients from typical to atypical to distinguish a pattern.
+
+6. A barchart of how often certain features groups or feature selection groups were selected in the optimal methods.
-5. A barchart of how often certain features groups were selected in the optimal methods. Only useful when using
- groupwise feature selection.
+ Stored in files ``Barchart_all_{num}.{ext}``. A ``.png`` is generated for previewing, a ``.tex`` with tikzplotlib
+ which can be used to plot the figure in LateX in high quality.
- By default, only the first evaluation method, e.g. metric computation, is used. The other methods can simply be added
- to WORC by using the ``add_evaluation()`` function, either directly in WORC or through the facade:
+ Gives an idea of which features are most relevant for the predictions of the model, and which feature methods are often succesful.
+ The overview of the hyperparameters, see above, is more quantitative and useful however.
-6. Decomposition of your feature space.
+7. Decomposition of your feature space.
+
+ Stored in file ``Decomposition_all_{num}.png``.
The following decompositions are performed:
@@ -338,6 +541,7 @@ Within these evaluation settings, the following performance evaluation methods a
regular PCA shows good separation of your classes, your classes can be split using linear combinations
of your features.
+
To add the evaluation workflow, simply use the ``add_evaluation`` function:
.. code-block:: python
@@ -348,14 +552,25 @@ To add the evaluation workflow, simply use the ``add_evaluation`` function:
...
experiment.add_evaluation(label_type)
+Or in the ``SimpleWORC`` or ``BasicWORC`` facades:
+
.. code-block:: python
- import WORC
from WORC import SimpleWORC
experiment = SimpleWORC('somename')
...
experiment.add_evaluation()
+The following outputs are only generated if certain configuration settings are used:
+
+1. Adjusted segmentations.
+
+ Stored in the ``Segmentations`` folder, in the files ``seg__{image_type}_{num}_{howsegmentationwasgenerated}_{sample_id}.hdf5``.
+ Only generated when the original segmentations were modified, e.g. using WORC's internal program segmentix
+ (see relevant section of the :ref:`configuration chapter `) or when registration was
+ performed to warp the segmentations from one sequence to another.
+
+
Debugging
---------
diff --git a/WORC/doc/_build/html/_static/documentation_options.js b/WORC/doc/_build/html/_static/documentation_options.js
index 9ddc46e0..18720fa5 100644
--- a/WORC/doc/_build/html/_static/documentation_options.js
+++ b/WORC/doc/_build/html/_static/documentation_options.js
@@ -1,6 +1,6 @@
var DOCUMENTATION_OPTIONS = {
URL_ROOT: document.getElementById("documentation_options").getAttribute('data-url_root'),
- VERSION: '3.6.0',
+ VERSION: '3.6.1',
LANGUAGE: 'None',
COLLAPSE_INDEX: false,
FILE_SUFFIX: '.html',
diff --git a/WORC/doc/_build/html/autogen/WORC.IOparser.html b/WORC/doc/_build/html/autogen/WORC.IOparser.html
index c5c4a3c8..99cfbc81 100644
--- a/WORC/doc/_build/html/autogen/WORC.IOparser.html
+++ b/WORC/doc/_build/html/autogen/WORC.IOparser.html
@@ -8,7 +8,7 @@
- IOparser Package — WORC 3.6.0 documentation
+ IOparser Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -374,7 +373,7 @@ IOparser Package
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.classification.html b/WORC/doc/_build/html/autogen/WORC.classification.html
index 68d92d4e..47fed640 100644
--- a/WORC/doc/_build/html/autogen/WORC.classification.html
+++ b/WORC/doc/_build/html/autogen/WORC.classification.html
@@ -8,7 +8,7 @@
- classification Package — WORC 3.6.0 documentation
+ classification Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -541,7 +540,7 @@
SearchCV
Module¶
-
-class
WORC.classification.SearchCV.
BaseSearchCV
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G', ranking_score='test_score', refit_workflows=False, ensemble_validation_score=None)[source]¶
+class WORC.classification.SearchCV.
BaseSearchCV
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G', ranking_score='test_score', refit_training_workflows=False, ensemble_validation_score=None, refit_validation_workflows=False)[source]¶
Bases: sklearn.base.BaseEstimator
, sklearn.base.MetaEstimatorMixin
Base class for hyper parameter search with cross-validation.
@@ -551,7 +550,7 @@
-
-
__init__
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G', ranking_score='test_score', refit_workflows=False, ensemble_validation_score=None)[source]¶
+__init__
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G', ranking_score='test_score', refit_training_workflows=False, ensemble_validation_score=None, refit_validation_workflows=False)[source]¶
Initialize SearchCV Object.
@@ -665,7 +664,7 @@
-
-
process_fit
(n_splits, parameters_all, test_sample_counts, test_score_dicts, train_score_dicts, fit_time, score_time, cv_iter, X, y, fitted_workflows=None, use_smac=False)[source]¶
+process_fit
(n_splits, parameters_all, test_sample_counts, test_score_dicts, train_score_dicts, fit_time, score_time, cv_iter, X, y, fitted_workflows=[], fitted_validation_workflows=[], use_smac=False)[source]¶
Process a fit.
Process the outcomes of a SearchCV fit and find the best settings
over all cross validations from all hyperparameters tested
@@ -727,7 +726,7 @@
-
-class
WORC.classification.SearchCV.
BaseSearchCVJoblib
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G', ranking_score='test_score', refit_workflows=False, ensemble_validation_score=None)[source]¶
+class WORC.classification.SearchCV.
BaseSearchCVJoblib
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G', ranking_score='test_score', refit_training_workflows=False, ensemble_validation_score=None, refit_validation_workflows=False)[source]¶
Bases: WORC.classification.SearchCV.BaseSearchCV
Base class for hyper parameter search with cross-validation.
@@ -744,7 +743,7 @@
-
-class
WORC.classification.SearchCV.
BaseSearchCVSMAC
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G', ranking_score='test_score', refit_workflows=False, ensemble_validation_score=None)[source]¶
+class WORC.classification.SearchCV.
BaseSearchCVSMAC
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G', ranking_score='test_score', refit_training_workflows=False, ensemble_validation_score=None, refit_validation_workflows=False)[source]¶
Bases: WORC.classification.SearchCV.BaseSearchCV
Base class for Bayesian hyper parameter search with cross-validation.
@@ -761,7 +760,7 @@
-
-class
WORC.classification.SearchCV.
BaseSearchCVfastr
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G', ranking_score='test_score', refit_workflows=False, ensemble_validation_score=None)[source]¶
+class WORC.classification.SearchCV.
BaseSearchCVfastr
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, maxlen=100, fastr_plugin=None, memory='2G', ranking_score='test_score', refit_training_workflows=False, ensemble_validation_score=None, refit_validation_workflows=False)[source]¶
Bases: WORC.classification.SearchCV.BaseSearchCV
Base class for hyper parameter search with cross-validation.
@@ -1417,7 +1416,7 @@
-
-class
WORC.classification.SearchCV.
GuidedSearchCVSMAC
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, fastr_plugin=None, maxlen=100, ranking_score='test_score', features=None, labels=None, smac_result_file=None)[source]¶
+class WORC.classification.SearchCV.
GuidedSearchCVSMAC
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, fastr_plugin=None, maxlen=100, ranking_score='test_score', features=None, labels=None, refit_training_workflows=False, refit_validation_workflows=False, smac_result_file=None)[source]¶
Bases: WORC.classification.SearchCV.BaseSearchCVSMAC
Guided search on hyperparameters.
GuidedSearchCV implements a “fit” and a “score” method.
@@ -1617,7 +1616,7 @@
-
-
__init__
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, fastr_plugin=None, maxlen=100, ranking_score='test_score', features=None, labels=None, smac_result_file=None)[source]¶
+__init__
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, fastr_plugin=None, maxlen=100, ranking_score='test_score', features=None, labels=None, refit_training_workflows=False, refit_validation_workflows=False, smac_result_file=None)[source]¶
Initialize SearchCV Object.
@@ -1885,7 +1884,7 @@
-
-class
WORC.classification.SearchCV.
RandomizedSearchCVfastr
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, fastr_plugin=None, memory='2G', maxlen=100, ranking_score='test_score', refit_workflows=False)[source]¶
+class WORC.classification.SearchCV.
RandomizedSearchCVfastr
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, fastr_plugin=None, memory='2G', maxlen=100, ranking_score='test_score', refit_training_workflows=False, refit_validation_workflows=False)[source]¶
Bases: WORC.classification.SearchCV.BaseSearchCVfastr
Randomized search on hyper parameters.
RandomizedSearchCV implements a “fit” and a “score” method.
@@ -2093,7 +2092,7 @@
-
-
__init__
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, fastr_plugin=None, memory='2G', maxlen=100, ranking_score='test_score', refit_workflows=False)[source]¶
+__init__
(param_distributions={}, n_iter=10, scoring=None, fit_params=None, n_jobs=1, iid=True, refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score='raise', return_train_score=True, n_jobspercore=100, fastr_plugin=None, memory='2G', maxlen=100, ranking_score='test_score', refit_training_workflows=False, refit_validation_workflows=False)[source]¶
Initialize SearchCV Object.
@@ -2273,7 +2272,7 @@
-
-
WORC.classification.crossval.
nocrossval
(config, label_data_train, label_data_test, image_features_train, image_features_test, param_grid=None, use_fastr=False, fastr_plugin=None, ensemble={'Use': False}, modus='singlelabel', do_test_RS_Ensemble=False)[source]¶
+WORC.classification.crossval.
nocrossval
(config, label_data_train, label_data_test, image_features_train, image_features_test, param_grid=None, use_fastr=False, fastr_plugin=None, ensemble={'Use': False}, modus='singlelabel')[source]¶
Constructs multiple individual classifiers based on the label settings.
- Arguments:
config (Dict): Dictionary with config settings
@@ -2305,7 +2304,7 @@
-
-
WORC.classification.crossval.
random_split_cross_validation
(image_features, feature_labels, classes, patient_ids, n_iterations, param_grid, config, modus, test_size, start=0, save_data=None, tempsave=False, tempfolder=None, fixedsplits=None, fixed_seed=False, use_fastr=None, fastr_plugin=None, do_test_RS_Ensemble=False, use_SMAC=False, smac_result_file=None)[source]¶
+WORC.classification.crossval.
random_split_cross_validation
(image_features, feature_labels, classes, patient_ids, n_iterations, param_grid, config, modus, test_size, start=0, save_data=None, tempsave=False, tempfolder=None, fixedsplits=None, fixed_seed=False, use_fastr=None, fastr_plugin=None, use_SMAC=False, smac_result_file=None)[source]¶
Cross-validation in which data is randomly split in each iteration.
Due to options of doing single-label and multi-label classification,
stratified splitting, and regression, we use a manual loop instead
@@ -2314,11 +2313,12 @@
-
-
WORC.classification.crossval.
test_RS_Ensemble
(estimator_input, X_train, Y_train, X_test, Y_test, feature_labels, output_json)[source]¶
+WORC.classification.crossval.
test_RS_Ensemble
(estimator_input, X_train, Y_train, X_test, Y_test, feature_labels, output_json, verbose=False, RSs=None, ensembles=None, maxlen=100)[source]¶
Test performance for different random search and ensemble sizes.
This function is written for conducting a specific experiment from the
WORC paper to test how the performance varies with varying random search
and ensemble sizes. We do not recommend usage in general of this part.
+maxlen = 100 # max ensembles numeric
@@ -2340,7 +2340,7 @@
-
-
WORC.classification.fitandscore.
fit_and_score
(X, y, scoring, train, test, parameters, fit_params=None, return_train_score=True, return_n_test_samples=True, return_times=True, return_parameters=False, return_estimator=False, error_score='raise', verbose=False, return_all=True, refit_workflows=False, use_smac=False)[source]¶
+WORC.classification.fitandscore.
fit_and_score
(X, y, scoring, train, test, parameters, fit_params=None, return_train_score=True, return_n_test_samples=True, return_times=True, return_parameters=False, return_estimator=False, error_score='raise', verbose=False, return_all=True, refit_training_workflows=False, refit_validation_workflows=False, skip=False)[source]¶
Fit an estimator to a dataset and score the performance.
The following
methods can currently be applied as preprocessing before fitting, in
@@ -2562,7 +2562,7 @@
parameter_optimization
Module¶
-
-
WORC.classification.parameter_optimization.
guided_search_parameters
(features, labels, N_iter, test_size, parameters, scoring_method, n_splits=5, n_jobspercore=200, use_fastr=False, n_cores=1, fastr_plugin=None, memory='2G', maxlen=100, ranking_score='test_score', random_seed=None, refit_workflows=False, smac_result_file=None)[source]¶
+WORC.classification.parameter_optimization.
guided_search_parameters
(features, labels, N_iter, test_size, parameters, scoring_method, n_splits=5, n_jobspercore=200, use_fastr=False, n_cores=1, fastr_plugin=None, memory='2G', maxlen=100, ranking_score='test_score', random_seed=None, refit_training_workflows=False, refit_validation_workflows=False, smac_result_file=None)[source]¶
Train a classifier and simultaneously optimizes hyperparameters using a
Bayesian optimization approach.
@@ -2599,7 +2599,7 @@
-
-
WORC.classification.parameter_optimization.
random_search_parameters
(features, labels, N_iter, test_size, param_grid, scoring_method, n_splits=5, n_jobspercore=200, use_fastr=False, n_cores=1, fastr_plugin=None, memory='2G', maxlen=100, ranking_score='test_score', random_seed=None, refit_workflows=False)[source]¶
+WORC.classification.parameter_optimization.
random_search_parameters
(features, labels, N_iter, test_size, param_grid, scoring_method, n_splits=5, n_jobspercore=200, use_fastr=False, n_cores=1, fastr_plugin=None, memory='2G', maxlen=100, ranking_score='test_score', random_seed=None, refit_training_workflows=False, refit_validation_workflows=False)[source]¶
Train a classifier and simultaneously optimizes hyperparameters using a
randomized search.
@@ -2734,7 +2734,7 @@
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.config.html b/WORC/doc/_build/html/autogen/WORC.config.html
index f0c9a3d9..d624870d 100644
--- a/WORC/doc/_build/html/autogen/WORC.config.html
+++ b/WORC/doc/_build/html/autogen/WORC.config.html
@@ -8,7 +8,7 @@
- <no title> — WORC 3.6.0 documentation
+ <no title> — WORC 3.6.1 documentation
@@ -62,7 +62,7 @@
- 3.6.0
+ 3.6.1
@@ -95,7 +95,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -203,7 +202,7 @@
Fingerprinting
-Fingerprinting
+
General
@@ -255,7 +254,7 @@
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.detectors.html b/WORC/doc/_build/html/autogen/WORC.detectors.html
index 3aab24ce..6f9e102c 100644
--- a/WORC/doc/_build/html/autogen/WORC.detectors.html
+++ b/WORC/doc/_build/html/autogen/WORC.detectors.html
@@ -8,7 +8,7 @@
- detectors Package — WORC 3.6.0 documentation
+ detectors Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -377,7 +376,7 @@ detectors Package
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.exampledata.html b/WORC/doc/_build/html/autogen/WORC.exampledata.html
index 3d555247..460623ed 100644
--- a/WORC/doc/_build/html/autogen/WORC.exampledata.html
+++ b/WORC/doc/_build/html/autogen/WORC.exampledata.html
@@ -8,7 +8,7 @@
- exampledata Package — WORC 3.6.0 documentation
+ exampledata Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -247,7 +246,7 @@ exampledata Package
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.facade.html b/WORC/doc/_build/html/autogen/WORC.facade.html
index 685eac7c..8c467d26 100644
--- a/WORC/doc/_build/html/autogen/WORC.facade.html
+++ b/WORC/doc/_build/html/autogen/WORC.facade.html
@@ -8,7 +8,7 @@
- facade Package — WORC 3.6.0 documentation
+ facade Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -282,6 +281,23 @@
Add manual overrides for the WORC configuration.
For a full list of options, see the
WORC Config chapter for allowed options.
+Example usage:
+
+- overrides = {
+- ‘Classification’: {
‘classifiers’: ‘SVM’,
+
+
+},
+‘Featsel’: {
+
+# Other estimators do not support multiclass
+‘SelectFromModel_estimator’: ‘RF’
+
+}
+
+
+}
+self.add_config_overrides(overrides)
- config: dictionary
Determine which options to override with which values.
@@ -607,7 +623,7 @@ Subpackages
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.featureprocessing.html b/WORC/doc/_build/html/autogen/WORC.featureprocessing.html
index a64483c6..47d8fcbd 100644
--- a/WORC/doc/_build/html/autogen/WORC.featureprocessing.html
+++ b/WORC/doc/_build/html/autogen/WORC.featureprocessing.html
@@ -8,7 +8,7 @@
- featureprocessing Package — WORC 3.6.0 documentation
+ featureprocessing Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -1122,7 +1121,7 @@
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.html b/WORC/doc/_build/html/autogen/WORC.html
index 0b86420a..d33e56c7 100644
--- a/WORC/doc/_build/html/autogen/WORC.html
+++ b/WORC/doc/_build/html/autogen/WORC.html
@@ -8,7 +8,7 @@
- WORC Package — WORC 3.6.0 documentation
+ WORC Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -678,6 +677,7 @@ Subpackagestests Package
WORCTutorialSimple_unittest_multiclass
Module
WORCTutorialSimple_unittest_regression
Module
+test_RSEnsemble
Module
test_combat
Module
test_helpers
Module
test_iccthreshold
Module
@@ -724,7 +724,7 @@ Subpackages
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.plotting.html b/WORC/doc/_build/html/autogen/WORC.plotting.html
index dfd60b80..39aa8202 100644
--- a/WORC/doc/_build/html/autogen/WORC.plotting.html
+++ b/WORC/doc/_build/html/autogen/WORC.plotting.html
@@ -8,7 +8,7 @@
- plotting Package — WORC 3.6.0 documentation
+ plotting Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -538,7 +537,7 @@
-
-
WORC.plotting.plot_images.
slicer
(image, mask=None, output_name=None, output_name_zoom=None, thresholds=[-5, 5], zoomfactor=4, dpi=500, normalize=True, expand=False, boundary=False, square=False, flip=True, rot90=0, alpha=0.4, axis='axial', index=None, color='cyan', radius=2, colormap='gray')[source]¶
+WORC.plotting.plot_images.
slicer
(image, mask=None, output_name=None, output_name_zoom=None, thresholds=[-5, 5], zoomfactor=4, dpi=500, normalize=True, expand=False, boundary=False, square=False, flip=True, rot90=0, alpha=0.4, axis='axial', index=None, color='cyan', radius=2, colormap='gray', fill=False)[source]¶
Plot slice of image where mask is largest, with mask as overlay.
image and mask should both be arrays
@@ -650,7 +649,7 @@
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.processing.html b/WORC/doc/_build/html/autogen/WORC.processing.html
index 5cb03459..196868b6 100644
--- a/WORC/doc/_build/html/autogen/WORC.processing.html
+++ b/WORC/doc/_build/html/autogen/WORC.processing.html
@@ -8,7 +8,7 @@
- processing Package — WORC 3.6.0 documentation
+ processing Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -274,7 +273,7 @@ processing Package
-
-
WORC.processing.helpers.
resample_image
(image, new_spacing, interpolator=3)[source]¶
+WORC.processing.helpers.
resample_image
(image, new_spacing=None, new_size=None, interpolator=3)[source]¶
Resample an image to another spacing.
- imageITK Image
Input image.
@@ -559,7 +558,7 @@ processing Package
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.resources.fastr_tests.html b/WORC/doc/_build/html/autogen/WORC.resources.fastr_tests.html
index 52572c85..c1d34734 100644
--- a/WORC/doc/_build/html/autogen/WORC.resources.fastr_tests.html
+++ b/WORC/doc/_build/html/autogen/WORC.resources.fastr_tests.html
@@ -8,7 +8,7 @@
- fastr_tests Package — WORC 3.6.0 documentation
+ fastr_tests Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -289,7 +288,7 @@ fastr_tests Package
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.resources.fastr_tools.html b/WORC/doc/_build/html/autogen/WORC.resources.fastr_tools.html
index b1dae46d..001df8ea 100644
--- a/WORC/doc/_build/html/autogen/WORC.resources.fastr_tools.html
+++ b/WORC/doc/_build/html/autogen/WORC.resources.fastr_tools.html
@@ -8,7 +8,7 @@
- fastr_tools Package — WORC 3.6.0 documentation
+ fastr_tools Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -223,7 +222,7 @@
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.resources.html b/WORC/doc/_build/html/autogen/WORC.resources.html
index 8e8fb6b8..59c44ca7 100644
--- a/WORC/doc/_build/html/autogen/WORC.resources.html
+++ b/WORC/doc/_build/html/autogen/WORC.resources.html
@@ -8,7 +8,7 @@
- resources Package — WORC 3.6.0 documentation
+ resources Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -235,7 +234,7 @@ Subpackages
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/autogen/WORC.tools.html b/WORC/doc/_build/html/autogen/WORC.tools.html
index b1e931f1..b56e59d3 100644
--- a/WORC/doc/_build/html/autogen/WORC.tools.html
+++ b/WORC/doc/_build/html/autogen/WORC.tools.html
@@ -8,7 +8,7 @@
- tools Package — WORC 3.6.0 documentation
+ tools Package — WORC 3.6.1 documentation
@@ -64,7 +64,7 @@
- 3.6.0
+ 3.6.1
@@ -97,7 +97,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -469,10 +468,44 @@ tools Packagecreatefixedsplits
Module¶
-
-
WORC.tools.createfixedsplits.
createfixedsplits
(label_file=None, label_type=None, patient_IDs=None, test_size=0.2, N_iterations=1, regression=False, stratify=None, modus='singlelabel', output=None)[source]¶
-Create fixed splits for a cross validation.
+WORC.tools.createfixedsplits.
createfixedsplits
(label_file=None, label_type=None, patient_IDs=None, stratify=True, test_size=0.2, N_iterations=1, modus='singlelabel', output=None)[source]¶
+Create fixed splits for a random-split cross-validation.
+
+
+
+- label_filefilepath
CSV file containing the labels of the patients.
+
+- label_type: list of strings
labels to extracted from the label file, e.g. [‘label1’]
+
+- patient_IDs: list of strings
names of patients to take into account. If None, take all
+
+- stratify: Boolean
If True, splits are stratified. In this case, you need to provide
+label data.
+
+- test_size: float
Percentage of patients in test set per iteration.
+
+- N_iterations: integer
Number of cross-validation iterations
+
+- modus: str
singlelabel or regression. Multilabel not implemented yet.
+
+- output: filepath
csv filename to save output to.
+
+
+
+
+
+- df: pandas Dataframe
Fixed splits created.
+
+
+
+
+
+
fingerprinting
Module¶
@@ -538,7 +571,7 @@ tools Package
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/genindex.html b/WORC/doc/_build/html/genindex.html
index 9c527668..84f09746 100644
--- a/WORC/doc/_build/html/genindex.html
+++ b/WORC/doc/_build/html/genindex.html
@@ -9,7 +9,7 @@
- Index — WORC 3.6.0 documentation
+ Index — WORC 3.6.1 documentation
@@ -63,7 +63,7 @@
- 3.6.0
+ 3.6.1
@@ -96,7 +96,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
- test_combat() (in module WORC.tests.test_combat)
@@ -1675,7 +1676,7 @@
W
- © Copyright 2016 -- 2020, Biomedical Imaging Group Rotterdam, Departments of Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
diff --git a/WORC/doc/_build/html/index.html b/WORC/doc/_build/html/index.html
index 24807176..56c4ef1a 100644
--- a/WORC/doc/_build/html/index.html
+++ b/WORC/doc/_build/html/index.html
@@ -8,7 +8,7 @@
- WORC: Workflow for Optimal Radiomics Classification — WORC 3.6.0 documentation
+ WORC: Workflow for Optimal Radiomics Classification — WORC 3.6.1 documentation
@@ -63,7 +63,7 @@
- 3.6.0
+ 3.6.1
@@ -96,7 +96,6 @@
- Additional functionality
- FAQ
- Developer documentation
-- Resource File Formats
- Changelog
@@ -194,6 +193,12 @@ Welcome to the WORC documentation!WORC.readthedocs.io.
For Tutorials on WORC, both for beginner and advanced WORCflows, please
see our Tutorial repository https://github.com/MStarmans91/WORCTutorial.
+The preprint of the WORC article, the WORC database, and my PhD thesis in which I developped WORC can be found here:
+
+
+
+
+
For more information regarding radiomics, we recommend the following book chapter:
@@ -232,28 +237,31 @@ WORC DocumentationQuick start guide
- Installation
-- Tutorials
-- Running an experiment
- User Manual
-- Interacting with WORC
-- The WORC Object
+- WORC object and facades
-- Input file definitions
-- Attributes: Sources
-- Construction and execution commands
+- Input file definitions and how to provide them to WORC
-- Evaluation of your network
+- Outputs and evaluation of your network
- Debugging
- Example data
@@ -264,6 +272,7 @@ WORC DocumentationContents
- General
- Labels
+- Fingerprinting
- Preprocessing
- Segmentix
- ImageFeatures
@@ -332,14 +341,19 @@ WORC DocumentationExecution errors
- My experiment crashed, where to begin looking for errors?
+- Error:
File "H5FDsec2.c", line 941, in H5FD_sec2_lock unable to lock file, errno = 37, error message = 'No locks available'
+- Error:
Failed building wheel for cryptography
(occurs often on BIGR cluster)
- Error:
WORC.addexceptions.WORCValueError: First column in the file
given to SimpleWORC().labels_from_this_file(**) needs to be named Patient.
- Error:
WORC.addexceptions.WORCKeyError: 'No entry found in labeling
for feature file .../feat_out_0.hdf5.'
-- Error:
File "...\lib\site-packages\numpy\lib\function_base.py", line 4406,
`` in delete keep[obj,] = False`` IndexError: arrays used as indices must be of integer (or boolean) type
+- Error:
File "...\lib\site-packages\numpy\lib\function_base.py", line 4406, in delete keep[obj,] = False IndexError: arrays used as indices must be of integer (or boolean) type
- Other
- I am working on the BIGR cluster and would like some jobs to be submitted to different queues
- Can I use my own features instead of the standard
WORC
features?
+- How to change the temporary and output folders?
+- How can I get the performance on the validation dataset?
+- My jobs on the BIGR cluster get cancelled due to memory errors
@@ -350,169 +364,174 @@ WORC DocumentationAdding methods to hyperoptimization
-- Resource File Formats
- Changelog
-- 3.6.0 - 2022-04-05
-- Added
+- 3.6.1 - 2023-02-15
-- 3.5.0 - 2021-08-18
-- Fixed
-- Changed
-- Added
+- 3.6.0 - 2022-04-05
-- 3.4.5 - 2021-07-09
+- 3.5.0 - 2021-08-18
-- 3.4.4 - 2021-07-01
-- Fixed
-- Changed
-- Added
+- 3.4.5 - 2021-07-09
-- 3.4.3 - 2021-06-02
+- 3.4.4 - 2021-07-01
-- 3.4.2 - 2021-05-27
-- Fixed
-- Added
+- 3.4.3 - 2021-06-02
-- 3.4.1 - 2021-05-18
-- Fixed
-- Changed
+- 3.4.2 - 2021-05-27
-- 3.4.0 - 2021-02-02
+- 3.4.1 - 2021-05-18
-- 3.3.5 - 2020-10-21