diff --git a/CHANGELOG b/CHANGELOG
index 349e0bb5..9f1e290e 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file.
The format is based on `Keep a Changelog
#!/usr/bin/env python
-# Copyright 2016-2020 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2023 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -195,20 +202,21 @@ Source code for WORC.IOparser.config_WORC
settings_dict: dictionary containing all parsed settings.
"""
if not os.path.exists(config_file_path):
- e = f'File {config_file_path} does not exist!'
+ e = f'File {config_file_path} does not exist!'
raise ae.WORCKeyError(e)
settings = configparser.ConfigParser()
settings.read(config_file_path)
- settings_dict = {'ImageFeatures': dict(), 'General': dict(),
- 'SVMFeatures': dict()}
+ settings_dict = {'Preprocessing': dict(), 'ImageFeatures': dict(), 'General': dict(),
+ 'SVMFeatures': dict(), 'Ensemble': dict(),
+ 'Labels': dict()}
settings_dict['ImageFeatures']['image_type'] =\
str(settings['ImageFeatures']['image_type'])
settings_dict['General']['FeatureCalculators'] =\
- [str(item).strip() for item in
+ [str(item).strip('[]') for item in
settings['General']['FeatureCalculators'].split(',')]
settings_dict['General']['Preprocessing'] =\
@@ -219,7 +227,31 @@ Source code for WORC.IOparser.config_WORC
settings_dict['General']['Segmentix'] =\
settings['General'].getboolean('Segmentix')
+
+ # Settings for ensembling
+ settings_dict['Ensemble']['Method'] =\
+ str(settings['Ensemble']['Method'])
+ settings_dict['Ensemble']['Size'] =\
+ int(settings['Ensemble']['Size'])
+
+ # Label settings
+ settings_dict['Labels']['label_names'] =\
+ [str(item).strip() for item in
+ settings['Labels']['label_names'].split(',')]
+ settings_dict['Labels']['modus'] =\
+ str(settings['Labels']['modus'])
+
+ # Whether to use some methods or not
+ settings_dict['General']['ComBat'] =\
+ str(settings['General']['ComBat'])
+
+ settings_dict['General']['Fingerprint'] =\
+ str(settings['General']['Fingerprint'])
+
+ settings_dict['Preprocessing']['Resampling'] =\
+ settings['Preprocessing'].getboolean('Resampling')
+
return settings_dict
@@ -227,20 +259,25 @@ Source code for WORC.IOparser.config_WORC
-
#!/usr/bin/env python
-# Copyright 2016-2022 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2023 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -196,7 +203,7 @@ Source code for WORC.IOparser.config_io_classifier
"""
if not os.path.exists(config_file_path):
- e = f'File {config_file_path} does not exist!'
+ e = f'File {config_file_path} does not exist!'
raise ae.WORCKeyError(e)
settings = configparser.ConfigParser()
@@ -257,6 +264,29 @@ Source code for WORC.IOparser.config_io_classifier
[int(str(item).strip()) for item in
settings['Featsel']['SelectFromModel_n_trees'].split(',')]
+ settings_dict['Featsel']['RFE'] =\
+ settings['Featsel'].getfloat('RFE')
+
+ settings_dict['Featsel']['RFE_lasso_alpha'] =\
+ [float(str(item).strip()) for item in
+ settings['Featsel']['RFE_lasso_alpha'].split(',')]
+
+ settings_dict['Featsel']['RFE_estimator'] =\
+ [str(item).strip() for item in
+ settings['Featsel']['RFE_estimator'].split(',')]
+
+ settings_dict['Featsel']['RFE_n_trees'] =\
+ [int(str(item).strip()) for item in
+ settings['Featsel']['RFE_n_trees'].split(',')]
+
+ settings_dict['Featsel']['RFE_n_features_to_select'] =\
+ [float(str(item).strip()) for item in
+ settings['Featsel']['RFE_n_features_to_select'].split(',')]
+
+ settings_dict['Featsel']['RFE_step'] =\
+ [int(str(item).strip()) for item in
+ settings['Featsel']['RFE_step'].split(',')]
+
settings_dict['Featsel']['GroupwiseSearch'] =\
[str(item).strip() for item in
settings['Featsel']['GroupwiseSearch'].split(',')]
@@ -539,7 +569,7 @@ Source code for WORC.IOparser.config_io_classifier
settings_dict['CrossValidation']['fixed_seed'] =\
settings['CrossValidation'].getboolean('fixed_seed')
- # Genetic settings
+ # Label settings
settings_dict['Labels']['label_names'] =\
[str(item).strip() for item in
settings['Labels']['label_names'].split(',')]
@@ -568,6 +598,8 @@ Source code for WORC.IOparser.config_io_classifier
settings['HyperOptimization'].getboolean('refit_validation_workflows')
settings_dict['HyperOptimization']['memory'] = \
str(settings['HyperOptimization']['memory'])
+ settings_dict['HyperOptimization']['fix_random_seed'] = \
+ settings['HyperOptimization'].getboolean('fix_random_seed')
# Settings for SMAC
settings_dict['SMAC']['use'] =\
@@ -611,20 +643,25 @@ Source code for WORC.IOparser.config_io_classifier
- © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands.
- Built with Sphinx using a theme provided by Read the Docs.
+
+
+
+ Built with Sphinx using a
+
+ theme
+
+ provided by Read the Docs.
-
@@ -633,7 +670,6 @@ Source code for WORC.IOparser.config_io_classifier
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
diff --git a/WORC/doc/_build/html/_modules/WORC/IOparser/config_preprocessing.html b/WORC/doc/_build/html/_modules/WORC/IOparser/config_preprocessing.html
index 84586b75..897a59f5 100644
--- a/WORC/doc/_build/html/_modules/WORC/IOparser/config_preprocessing.html
+++ b/WORC/doc/_build/html/_modules/WORC/IOparser/config_preprocessing.html
@@ -1,39 +1,42 @@
-
-
+
-
+
-
+
- WORC.IOparser.config_preprocessing — WORC 3.6.2 documentation
+ WORC.IOparser.config_preprocessing — WORC 3.6.3 documentation
+
+
+
+
+
+
+
+
-
+
-
-
-
-
+
+
+
-
-
-
-
-
@@ -62,7 +65,7 @@
- 3.6.2
+ 3.6.3
@@ -79,6 +82,7 @@
+
@@ -104,6 +108,7 @@
+
@@ -138,11 +143,13 @@
+
+
- - Docs »
+ - »
- Module code »
@@ -164,7 +171,7 @@
Source code for WORC.IOparser.config_preprocessing
#!/usr/bin/env python
-# Copyright 2016-2020 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2023 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -195,7 +202,7 @@ Source code for WORC.IOparser.config_preprocessing
settings_dict: dictionary containing all parsed settings.
"""
if not os.path.exists(config_file_path):
- e = f'File {config_file_path} does not exist!'
+ e = f'File {config_file_path} does not exist!'
raise ae.WORCKeyError(e)
settings = configparser.ConfigParser()
@@ -225,8 +232,21 @@ Source code for WORC.IOparser.config_preprocessing
settings['Preprocessing']['Clipping_Range'].split(',')]
if len(settings_dict['Preprocessing']['Clipping_Range']) != 2:
- raise ae.WORCValueError(f"Clipping range should be two floats split by a comma, got {settings['Preprocessing']['Clipping_Range']}.")
+ raise ae.WORCValueError(f"Clipping range should be two floats split by a comma, got {settings['Preprocessing']['Clipping_Range']}.")
+
+ # Histogram equalization
+ settings_dict['Preprocessing']['HistogramEqualization'] =\
+ settings['Preprocessing'].getboolean('HistogramEqualization')
+
+ settings_dict['Preprocessing']['HistogramEqualization_Alpha'] =\
+ float(settings['Preprocessing']['HistogramEqualization_Alpha'])
+ settings_dict['Preprocessing']['HistogramEqualization_Beta'] =\
+ float(settings['Preprocessing']['HistogramEqualization_Beta'])
+
+ settings_dict['Preprocessing']['HistogramEqualization_Radius'] =\
+ int(settings['Preprocessing']['HistogramEqualization_Radius'])
+
# Normalization
settings_dict['Preprocessing']['Normalize'] =\
settings['Preprocessing'].getboolean('Normalize')
@@ -270,7 +290,7 @@ Source code for WORC.IOparser.config_preprocessing
if len(settings_dict['Preprocessing']['Resampling_spacing']) != 3:
s = settings_dict['Preprocessing']['Resampling_spacing']
- raise ae.WORCValueError(f'Resampling spacing should be three elements, got {s}')
+ raise ae.WORCValueError(f'Resampling spacing should be three elements, got {s}')
return settings_dict
@@ -279,20 +299,25 @@ Source code for WORC.IOparser.config_preprocessing
- © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands.
- Built with Sphinx using a theme provided by Read the Docs.
+
+
+
+ Built with Sphinx using a
+
+ theme
+
+ provided by Read the Docs.
-
@@ -301,7 +326,6 @@ Source code for WORC.IOparser.config_preprocessing
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
diff --git a/WORC/doc/_build/html/_modules/WORC/WORC.html b/WORC/doc/_build/html/_modules/WORC/WORC.html
index f65f507d..506d5fb5 100644
--- a/WORC/doc/_build/html/_modules/WORC/WORC.html
+++ b/WORC/doc/_build/html/_modules/WORC/WORC.html
@@ -1,39 +1,42 @@
-
-
+
-
+
-
+
- WORC.WORC — WORC 3.6.2 documentation
+ WORC.WORC — WORC 3.6.3 documentation
+
+
+
+
+
+
+
+
-
+
-
-
-
-
+
+
+
-
-
-
-
-
@@ -62,7 +65,7 @@
- 3.6.2
+ 3.6.3
@@ -79,6 +82,7 @@
+
@@ -104,6 +108,7 @@
+
@@ -138,11 +143,13 @@
+
+
- - Docs »
+ - »
- Module code »
@@ -164,7 +171,7 @@
Source code for WORC.WORC
#!/usr/bin/env python
-# Copyright 2016-2022 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2023 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -184,20 +191,20 @@ Source code for WORC.WORC
import fastr
import graphviz
import configparser
-from pathlib import Path
-from random import randint
+from pathlib import Path
+from random import randint
import WORC.IOparser.file_io as io
-from fastr.api import ResourceLimit
-from WORC.tools.Slicer import Slicer
-from WORC.tools.Elastix import Elastix
-from WORC.tools.Evaluate import Evaluate
+from fastr.api import ResourceLimit
+from WORC.tools.Slicer import Slicer
+from WORC.tools.Elastix import Elastix
+from WORC.tools.Evaluate import Evaluate
import WORC.addexceptions as WORCexceptions
import WORC.IOparser.config_WORC as config_io
-from WORC.detectors.detectors import DebugDetector
-from WORC.export.hyper_params_exporter import export_hyper_params_to_latex
-from urllib.parse import urlparse
-from urllib.request import url2pathname
-from WORC.tools.fingerprinting import quantitative_modalities, qualitative_modalities, all_modalities
+from WORC.detectors.detectors import DebugDetector
+from WORC.export.hyper_params_exporter import export_hyper_params_to_latex
+from urllib.parse import urlparse
+from urllib.request import url2pathname
+from WORC.tools.fingerprinting import quantitative_modalities, qualitative_modalities, all_modalities
[docs]class WORC(object):
@@ -274,7 +281,7 @@ Source code for WORC.WORC
"""
-[docs] def __init__(self, name='test'):
+[docs] def __init__(self, name='test'):
"""Initialize WORC object.
Set the initial variables all to None, except for some defaults.
@@ -306,6 +313,8 @@ Source code for WORC.WORC
self.masks_normalize_test = list()
self.features_test = list()
self.metadata_test = list()
+
+ self.trained_model = None
self.Elastix_Para = list()
self.label_names = 'Label1, Label2'
@@ -323,6 +332,7 @@ Source code for WORC.WORC
self.segmode = []
self._add_evaluation = False
self.TrainTest = False
+ self.OnlyTest = False
# Memory settings for all fastr nodes
self.fastr_memory_parameters = dict()
@@ -394,6 +404,11 @@ Source code for WORC.WORC
config['Preprocessing']['BiasCorrection_Mask'] = 'False'
config['Preprocessing']['CheckOrientation'] = 'False'
config['Preprocessing']['OrientationPrimaryAxis'] = 'axial'
+ config['Preprocessing']['HistogramEqualization'] = 'False'
+ config['Preprocessing']['HistogramEqualization_Alpha'] = '0.3'
+ config['Preprocessing']['HistogramEqualization_Beta'] = '0.3'
+ config['Preprocessing']['HistogramEqualization_Radius'] = '5'
+
# Segmentix
config['Segmentix'] = dict()
@@ -548,6 +563,12 @@ Source code for WORC.WORC
config['Featsel']['ReliefSampleSize'] = '0.75, 0.2'
config['Featsel']['ReliefDistanceP'] = '1, 3'
config['Featsel']['ReliefNumFeatures'] = '10, 40'
+ config['Featsel']['RFE'] = '0.0'
+ config['Featsel']['RFE_estimator'] = config['Featsel']['SelectFromModel_estimator']
+ config['Featsel']['RFE_lasso_alpha'] = config['Featsel']['SelectFromModel_lasso_alpha']
+ config['Featsel']['RFE_n_trees'] = config['Featsel']['SelectFromModel_n_trees']
+ config['Featsel']['RFE_n_features_to_select'] = '10, 90'
+ config['Featsel']['RFE_step'] = '1, 9'
# Groupwise Featureselection options
config['SelectFeatGroup'] = dict()
@@ -664,6 +685,7 @@ Source code for WORC.WORC
config['HyperOptimization']['memory'] = '3G'
config['HyperOptimization']['refit_training_workflows'] = 'False'
config['HyperOptimization']['refit_validation_workflows'] = 'False'
+ config['HyperOptimization']['fix_random_seed'] = 'False'
# SMAC options
config['SMAC'] = dict()
@@ -695,30 +717,35 @@ Source code for WORC.WORC
"""Add several tools to the WORC object."""
self.Tools = Tools()
-[docs] def build(self, wtype='training'):
+[docs] def build(self, buildtype='training'):
"""Build the network based on the given attributes.
Parameters
----------
- wtype: string, default 'training'
+ buildtype: string, default 'training'
Specify the WORC execution type.
- - testing: use if you have a trained classifier and want to
+ - inference: use if you have a trained classifier and want to
train it on some new images.
- training: use if you want to train a classifier from a dataset.
"""
- self.wtype = wtype
- if wtype == 'training':
+ if buildtype == 'training':
self.build_training()
- elif wtype == 'testing':
- self.build_testing()
-
+ elif buildtype == 'inference':
+ raise WORCexceptions.WORCValueError("Inference workflow is still WIP and does not fully work yet.")
+ self.TrainTest = True
+ self.OnlyTest = True
+ self.build_inference()
+
[docs] def build_training(self):
"""Build the training network based on the given attributes."""
# We either need images or features for Radiomics
if self.images_test or self.features_test:
+ if not self.labels_test:
+ m = "You provided images and/or features for a test set, but not ground truth labels. Please also provide labels for the test set."
+ raise WORCexceptions.WORCValueError(m)
self.TrainTest = True
-
+
if self.images_train or self.features_train:
print('Building training network...')
# We currently require labels for supervised learning
@@ -735,13 +762,10 @@ Source code for WORC.WORC
# NOTE: We currently use the first configuration as general config
image_types = list()
for c in range(len(self.configs)):
- if type(self.configs[c]) == str:
- # Probably, c is a configuration file
- self.configs[c] = config_io.load_config(self.configs[c])
image_types.append(self.configs[c]['ImageFeatures']['image_type'])
if self.configs[0]['General']['Fingerprint'] == 'True' and any(imt not in all_modalities for imt in image_types):
- m = f'One of your image types {image_types} is not one of the valid image types {quantitative_modalities + qualitative_modalities}. This is mandatory to set when performing fingerprinting, see the WORC Documentation (https://worc.readthedocs.io/en/latest/static/configuration.html#imagefeatures).'
+ m = f'One of your image types {image_types} is not one of the valid image types {quantitative_modalities + qualitative_modalities}. This is mandatory to set when performing fingerprinting, see the WORC Documentation (https://worc.readthedocs.io/en/latest/static/configuration.html#imagefeatures).'
raise WORCexceptions.WORCValueError(m)
# Create config source
@@ -904,9 +928,9 @@ Source code for WORC.WORC
else:
nseg = len(self.segmentations_train)
nim = len(image_types)
- m = f'Length of segmentations for training is ' +\
- f'{nseg}: should be equal to number of images' +\
- f' ({nim}) or 1 when using registration.'
+ m = f'Length of segmentations for training is ' +\
+ f'{nseg}: should be equal to number of images' +\
+ f' ({nim}) or 1 when using registration.'
raise WORCexceptions.WORCValueError(m)
# BUG: We assume that first type defines if we use segmentix
@@ -954,8 +978,8 @@ Source code for WORC.WORC
self.modlabels.append(label)
# Create required sources and sinks
- self.sources_parameters[label] = self.network.create_source('ParameterFile', id=f'config_{label}', step_id='general_sources')
- self.sinks_configs[label] = self.network.create_sink('ParameterFile', id=f'config_{label}_sink', node_group='conf', step_id='general_sinks')
+ self.sources_parameters[label] = self.network.create_source('ParameterFile', id=f'config_{label}', step_id='general_sources')
+ self.sinks_configs[label] = self.network.create_sink('ParameterFile', id=f'config_{label}_sink', node_group='conf', step_id='general_sinks')
self.sources_images_train[label] = self.network.create_source('ITKImageFile', id='images_train_' + label, node_group='train', step_id='train_sources')
if self.TrainTest:
@@ -1025,8 +1049,8 @@ Source code for WORC.WORC
# Add fingerprinting
if self.configs[0]['General']['Fingerprint'] == 'True':
self.add_fingerprinter(id=label, type='images', config_source=self.sources_parameters[label].output)
- self.links_fingerprinting[f'{label}_images'] = self.network.create_link(self.converters_im_train[label].outputs['image'], self.node_fingerprinters[label].inputs['images_train'])
- self.links_fingerprinting[f'{label}_images'].collapse = 'train'
+ self.links_fingerprinting[f'{label}_images'] = self.network.create_link(self.converters_im_train[label].outputs['image'], self.node_fingerprinters[label].inputs['images_train'])
+ self.links_fingerprinting[f'{label}_images'].collapse = 'train'
self.sinks_configs[label].input = self.node_fingerprinters[label].outputs['config']
@@ -1059,7 +1083,7 @@ Source code for WORC.WORC
self.featureconverter_test[label] = list()
for f in feature_calculators:
- print(f'\t - Adding feature calculation node: {f}.')
+ print(f'\t - Adding feature calculation node: {f}.')
self.add_feature_calculator(f, label, nmod)
# -----------------------------------------------------
@@ -1103,8 +1127,8 @@ Source code for WORC.WORC
# Add to fingerprinting if required
if self.configs[0]['General']['Fingerprint'] == 'True':
- self.links_fingerprinting[f'{label}_segmentations'] = self.network.create_link(self.converters_seg_train[label].outputs['image'], self.node_fingerprinters[label].inputs['segmentations_train'])
- self.links_fingerprinting[f'{label}_segmentations'].collapse = 'train'
+ self.links_fingerprinting[f'{label}_segmentations'] = self.network.create_link(self.converters_seg_train[label].outputs['image'], self.node_fingerprinters[label].inputs['segmentations_train'])
+ self.links_fingerprinting[f'{label}_segmentations'].collapse = 'train'
elif self.segmode == 'Register':
# ---------------------------------------------
@@ -1115,8 +1139,8 @@ Source code for WORC.WORC
# Add to fingerprinting if required
if self.configs[0]['General']['Fingerprint'] == 'True':
# Since there are no segmentations yet of this modality, just use those of the first, provided modality
- self.links_fingerprinting[f'{label}_segmentations'] = self.network.create_link(self.converters_seg_train[self.modlabels[0]].outputs['image'], self.node_fingerprinters[label].inputs['segmentations_train'])
- self.links_fingerprinting[f'{label}_segmentations'].collapse = 'train'
+ self.links_fingerprinting[f'{label}_segmentations'] = self.network.create_link(self.converters_seg_train[self.modlabels[0]].outputs['image'], self.node_fingerprinters[label].inputs['segmentations_train'])
+ self.links_fingerprinting[f'{label}_segmentations'].collapse = 'train'
# -----------------------------------------------------
# Optionally, add segmentix, the in-house segmentation
@@ -1165,13 +1189,13 @@ Source code for WORC.WORC
# Link features to ComBat
self.links_Combat1_train[label] = list()
for i_node, fname in enumerate(self.featurecalculators[label]):
- self.links_Combat1_train[label].append(self.ComBat.inputs['features_train'][f'{label}_{self.featurecalculators[label][i_node]}'] << self.featureconverter_train[label][i_node].outputs['feat_out'])
+ self.links_Combat1_train[label].append(self.ComBat.inputs['features_train'][f'{label}_{self.featurecalculators[label][i_node]}'] << self.featureconverter_train[label][i_node].outputs['feat_out'])
self.links_Combat1_train[label][i_node].collapse = 'train'
if self.TrainTest:
self.links_Combat1_test[label] = list()
for i_node, fname in enumerate(self.featurecalculators[label]):
- self.links_Combat1_test[label].append(self.ComBat.inputs['features_test'][f'{label}_{self.featurecalculators[label][i_node]}'] << self.featureconverter_test[label][i_node].outputs['feat_out'])
+ self.links_Combat1_test[label].append(self.ComBat.inputs['features_test'][f'{label}_{self.featurecalculators[label][i_node]}'] << self.featureconverter_test[label][i_node].outputs['feat_out'])
self.links_Combat1_test[label][i_node].collapse = 'test'
# -----------------------------------------------------
@@ -1189,7 +1213,7 @@ Source code for WORC.WORC
# Append features to the classification
if not self.configs[0]['General']['ComBat'] == 'True':
- self.links_C1_train[label].append(self.classify.inputs['features_train'][f'{label}_{self.featurecalculators[label][i_node]}'] << self.featureconverter_train[label][i_node].outputs['feat_out'])
+ self.links_C1_train[label].append(self.classify.inputs['features_train'][f'{label}_{self.featurecalculators[label][i_node]}'] << self.featureconverter_train[label][i_node].outputs['feat_out'])
self.links_C1_train[label][i_node].collapse = 'train'
# Save output
@@ -1202,7 +1226,7 @@ Source code for WORC.WORC
# Append features to the classification
if not self.configs[0]['General']['ComBat'] == 'True':
- self.links_C1_test[label].append(self.classify.inputs['features_test'][f'{label}_{self.featurecalculators[label][i_node]}'] << self.featureconverter_test[label][i_node].outputs['feat_out'])
+ self.links_C1_test[label].append(self.classify.inputs['features_test'][f'{label}_{self.featurecalculators[label][i_node]}'] << self.featureconverter_test[label][i_node].outputs['feat_out'])
self.links_C1_test[label][i_node].collapse = 'test'
# Save output
@@ -1254,6 +1278,358 @@ Source code for WORC.WORC
else:
raise WORCexceptions.WORCIOError("Please provide either images or features.")
+[docs] def build_inference(self):
+ """Build a network to test an already trained model on a test dataset based on the given attributes."""
+ #FIXME WIP
+ if self.images_test or self.features_test:
+ if not self.labels_test:
+ m = "You provided images and/or features for a test set, but not ground truth labels. Please also provide labels for the test set."
+ raise WORCexceptions.WORCValueError(m)
+ else:
+ m = "Please provide either images and/or features for your test set."
+ raise WORCexceptions.WORCValueError(m)
+
+ if not self.configs:
+ m = 'For a testing workflow, you need to provide a WORC config.ini file'
+ raise WORCexceptions.WORCValueError(m)
+
+ self.network = fastr.create_network(self.name)
+
+ # Add trained model node
+ memory = self.fastr_memory_parameters['Classification']
+ self.source_trained_model = self.network.create_source('HDF5',
+ id='trained_model',
+ node_group='trained_model', step_id='general_sources')
+
+ if self.images_test or self.features_test:
+ print('Building testing network...')
+ # We currently require labels for supervised learning
+ if self.labels_test:
+ self.network = fastr.create_network(self.name)
+
+ # Extract some information from the configs
+ image_types = list()
+ for conf_it in range(len(self.configs)):
+ if type(self.configs[conf_it]) == str:
+ # Config is a .ini file, load
+ temp_conf = config_io.load_config(self.configs[conf_it])
+ else:
+ temp_conf = self.configs[conf_it]
+
+ image_type = temp_conf['ImageFeatures']['image_type']
+ image_types.append(image_type)
+
+ # NOTE: We currently use the first configuration as general config
+ if conf_it == 0:
+ print(temp_conf)
+ ensemble_method = [temp_conf['Ensemble']['Method']]
+ ensemble_size = [temp_conf['Ensemble']['Size']]
+ label_names = [temp_conf['Labels']['label_names']]
+ use_ComBat = temp_conf['General']['ComBat']
+ use_segmentix = temp_conf['General']['Segmentix']
+
+ # Create various input sources
+ self.source_patientclass_test =\
+ self.network.create_source('PatientInfoFile',
+ id='patientclass_test',
+ node_group='pctest', step_id='test_sources')
+
+ self.source_ensemble_method =\
+ self.network.create_constant('String', ensemble_method,
+ id='ensemble_method',
+ step_id='Evaluation')
+
+ self.source_ensemble_size =\
+ self.network.create_constant('String', ensemble_size,
+ id='ensemble_size',
+ step_id='Evaluation')
+
+ self.source_LabelType =\
+ self.network.create_constant('String', label_names,
+ id='LabelType',
+ step_id='Evaluation')
+
+ memory = self.fastr_memory_parameters['PlotEstimator']
+ self.plot_estimator =\
+ self.network.create_node('worc/PlotEstimator:1.0', tool_version='1.0',
+ id='plot_Estimator',
+ resources=ResourceLimit(memory=memory),
+ step_id='Evaluation')
+
+ # Links to performance creator
+ self.plot_estimator.inputs['ensemble_method'] = self.source_ensemble_method.output
+ self.plot_estimator.inputs['ensemble_size'] = self.source_ensemble_size.output
+ self.plot_estimator.inputs['label_type'] = self.source_LabelType.output
+ pinfo = self.source_patientclass_test.output
+ self.plot_estimator.inputs['prediction'] = self.source_trained_model.output
+ self.plot_estimator.inputs['pinfo'] = pinfo
+
+ # Performance output
+ self.sink_performance = self.network.create_sink('JsonFile', id='performance', step_id='general_sinks')
+ self.sink_performance.input = self.plot_estimator.outputs['output_json']
+
+ if self.masks_normalize_test:
+ self.sources_masks_normalize_test = dict()
+
+ # -----------------------------------------------------
+ # Optionally, add ComBat Harmonization. Currently done
+ # on full dataset, not in a cross-validation
+ if use_ComBat == 'True':
+ message = '[ERROR] If you want to use ComBat, you need to provide training images or features as well.'
+ raise WORCexceptions.WORCNotImplementedError(message)
+
+ if not self.features_test:
+ # Create nodes to compute features
+ # General
+ self.sources_parameters = dict()
+ self.source_config_pyradiomics = dict()
+ self.source_toolbox_name = dict()
+
+ # testing only
+ self.calcfeatures_test = dict()
+ self.featureconverter_test = dict()
+ self.preprocessing_test = dict()
+ self.sources_images_test = dict()
+ self.sinks_features_test = dict()
+ self.sinks_configs = dict()
+ self.converters_im_test = dict()
+ self.converters_seg_test = dict()
+ self.links_C1_test = dict()
+
+ self.featurecalculators = dict()
+
+ # Check which nodes are necessary
+ if not self.segmentations_test:
+ message = "No automatic segmentation method is yet implemented."
+ raise WORCexceptions.WORCNotImplementedError(message)
+
+ elif len(self.segmentations_test) == len(image_types):
+ # Segmentations provided
+ self.sources_segmentations_test = dict()
+ self.segmode = 'Provided'
+
+ elif len(self.segmentations_test) == 1:
+ # Assume segmentations need to be registered to other modalities
+ print('\t - Adding Elastix node for image registration.')
+ self.add_elastix_sourcesandsinks()
+ pass
+
+ else:
+ nseg = len(self.segmentations_test)
+ nim = len(image_types)
+ m = f'Length of segmentations for testing is ' +\
+ f'{nseg}: should be equal to number of images' +\
+ f' ({nim}) or 1 when using registration.'
+ raise WORCexceptions.WORCValueError(m)
+
+ if use_segmentix == 'True':
+ # Use the segmentix toolbox for segmentation processing
+ print('\t - Adding segmentix node for segmentation preprocessing.')
+ self.sinks_segmentations_segmentix_test = dict()
+ self.sources_masks_test = dict()
+ self.converters_masks_test = dict()
+ self.nodes_segmentix_test = dict()
+
+ if self.semantics_test:
+ # Semantic features are supplied
+ self.sources_semantics_test = dict()
+
+ if self.metadata_test:
+ # Metadata to extract patient features from is supplied
+ self.sources_metadata_test = dict()
+
+ # Create a part of the pipeline for each modality
+ self.modlabels = list()
+ for nmod, mod in enumerate(image_types):
+ # Extract some modality specific config info
+ if type(self.configs[conf_it]) == str:
+ # Config is a .ini file, load
+ temp_conf = config_io.load_config(self.configs[nmod])
+ else:
+ temp_conf = self.configs[nmod]
+
+ # Create label for each modality/image
+ num = 0
+ label = mod + '_' + str(num)
+ while label in self.calcfeatures_test.keys():
+ # if label already exists, add number to label
+ num += 1
+ label = mod + '_' + str(num)
+ self.modlabels.append(label)
+
+ # Create required sources and sinks
+ self.sources_parameters[label] = self.network.create_source('ParameterFile', id=f'config_{label}', step_id='general_sources')
+ self.sources_images_test[label] = self.network.create_source('ITKImageFile', id='images_test_' + label, node_group='test', step_id='test_sources')
+
+ if self.metadata_test and len(self.metadata_test) >= nmod + 1:
+ self.sources_metadata_test[label] = self.network.create_source('DicomImageFile', id='metadata_test_' + label, node_group='test', step_id='test_sources')
+
+ if self.masks_test and len(self.masks_test) >= nmod + 1:
+ # Create mask source and convert
+ self.sources_masks_test[label] = self.network.create_source('ITKImageFile', id='mask_test_' + label, node_group='test', step_id='test_sources')
+ memory = self.fastr_memory_parameters['WORCCastConvert']
+ self.converters_masks_test[label] =\
+ self.network.create_node('worc/WORCCastConvert:0.3.2',
+ tool_version='0.1',
+ id='convert_mask_test_' + label,
+ node_group='test',
+ resources=ResourceLimit(memory=memory),
+ step_id='FileConversion')
+
+ self.converters_masks_test[label].inputs['image'] = self.sources_masks_test[label].output
+
+ # First convert the images
+ if any(modality in mod for modality in all_modalities):
+ # Use WORC PXCastConvet for converting image formats
+ memory = self.fastr_memory_parameters['WORCCastConvert']
+ self.converters_im_test[label] =\
+ self.network.create_node('worc/WORCCastConvert:0.3.2',
+ tool_version='0.1',
+ id='convert_im_test_' + label,
+ resources=ResourceLimit(memory=memory),
+ step_id='FileConversion')
+
+ else:
+ raise WORCexceptions.WORCTypeError(('No valid image type for modality {}: {} provided.').format(str(nmod), mod))
+
+ # Create required links
+ self.converters_im_test[label].inputs['image'] = self.sources_images_test[label].output
+
+ # -----------------------------------------------------
+ # Preprocessing
+ preprocess_node = str(temp_conf['General']['Preprocessing'])
+ print('\t - Adding preprocessing node for image preprocessing.')
+ self.add_preprocessing(preprocess_node, label, nmod)
+
+ # -----------------------------------------------------
+ # Feature calculation
+ feature_calculators =\
+ temp_conf['General']['FeatureCalculators']
+ if not isinstance(feature_calculators, list):
+ # Configparser object, need to split string
+ feature_calculators = feature_calculators.strip('][').split(', ')
+ self.featurecalculators[label] = [f.split('/')[0] for f in feature_calculators]
+ else:
+ self.featurecalculators[label] = feature_calculators
+
+
+ # Add lists for feature calculation and converter objects
+ self.calcfeatures_test[label] = list()
+ self.featureconverter_test[label] = list()
+
+ for f in feature_calculators:
+ print(f'\t - Adding feature calculation node: {f}.')
+ self.add_feature_calculator(f, label, nmod)
+
+ # -----------------------------------------------------
+ # Create the neccesary nodes for the segmentation
+ if self.segmode == 'Provided':
+ # Segmentation ----------------------------------------------------
+ # Use the provided segmantions for each modality
+ memory = self.fastr_memory_parameters['WORCCastConvert']
+ self.sources_segmentations_test[label] =\
+ self.network.create_source('ITKImageFile',
+ id='segmentations_test_' + label,
+ node_group='test',
+ step_id='test_sources')
+
+ self.converters_seg_test[label] =\
+ self.network.create_node('worc/WORCCastConvert:0.3.2',
+ tool_version='0.1',
+ id='convert_seg_test_' + label,
+ resources=ResourceLimit(memory=memory),
+ step_id='FileConversion')
+
+ self.converters_seg_test[label].inputs['image'] =\
+ self.sources_segmentations_test[label].output
+
+ elif self.segmode == 'Register':
+ # ---------------------------------------------
+ # Registration nodes: Align segmentation of first
+ # modality to others using registration with Elastix
+ self.add_elastix(label, nmod)
+
+ # -----------------------------------------------------
+ # Optionally, add segmentix, the in-house segmentation
+ # processor of WORC
+ if temp_conf['General']['Segmentix'] == 'True':
+ self.add_segmentix(label, nmod)
+ elif temp_conf['Preprocessing']['Resampling'] == 'True':
+ raise WORCexceptions.WORCValueError('If you use resampling, ' +
+ 'have to use segmentix to ' +
+ ' make sure the mask is ' +
+ 'also resampled. Please ' +
+ 'set ' +
+ 'config["General"]["Segmentix"]' +
+ 'to "True".')
+
+ else:
+ # Provide source or elastix segmentations to
+ # feature calculator
+ for i_node in range(len(self.calcfeatures_test[label])):
+ if self.segmode == 'Provided':
+ self.calcfeatures_test[label][i_node].inputs['segmentation'] =\
+ self.converters_seg_test[label].outputs['image']
+ elif self.segmode == 'Register':
+ if nmod > 0:
+ self.calcfeatures_test[label][i_node].inputs['segmentation'] =\
+ self.transformix_seg_nodes_test[label].outputs['image']
+ else:
+ self.calcfeatures_test[label][i_node].inputs['segmentation'] =\
+ self.converters_seg_test[label].outputs['image']
+
+ # -----------------------------------------------------
+ # Optionally, add ComBat Harmonization
+ if use_ComBat == 'True':
+ # Link features to ComBat
+ self.links_Combat1_test[label] = list()
+ for i_node, fname in enumerate(self.featurecalculators[label]):
+ self.links_Combat1_test[label].append(self.ComBat.inputs['features_test'][f'{label}_{self.featurecalculators[label][i_node]}'] << self.featureconverter_test[label][i_node].outputs['feat_out'])
+ self.links_Combat1_test[label][i_node].collapse = 'test'
+
+ # -----------------------------------------------------
+ # Output the features
+ # Add the features from this modality to the classifier node input
+ self.links_C1_test[label] = list()
+ self.sinks_features_test[label] = list()
+
+ for i_node, fname in enumerate(self.featurecalculators[label]):
+ # Create sink for feature outputs
+ node_id = 'features_test_' + label + '_' + fname
+ node_id = node_id.replace(':', '_').replace('.', '_').replace('/', '_')
+ self.sinks_features_test[label].append(self.network.create_sink('HDF5', id=node_id, step_id='test_sinks'))
+
+ # Save output
+ self.sinks_features_test[label][i_node].input = self.featureconverter_test[label][i_node].outputs['feat_out']
+
+ else:
+ # Features already provided: hence we can skip numerous nodes
+ self.sources_features_train = dict()
+ self.links_C1_train = dict()
+
+ if self.features_test:
+ self.sources_features_test = dict()
+ self.links_C1_test = dict()
+
+ # Create label for each modality/image
+ self.modlabels = list()
+ for num, mod in enumerate(image_types):
+ num = 0
+ label = mod + str(num)
+ while label in self.sources_features_train.keys():
+ # if label exists, add number to label
+ num += 1
+ label = mod + str(num)
+ self.modlabels.append(label)
+
+ # Create a node for the features
+ self.sources_features_test[label] = self.network.create_source('HDF5', id='features_test_' + label, node_group='test', step_id='test_sources')
+
+ else:
+ raise WORCexceptions.WORCIOError("Please provide labels for training, i.e., WORC.labels_train or SimpleWORC.labels_from_this_file.")
+ else:
+ raise WORCexceptions.WORCIOError("Please provide either images or features.")
+
[docs] def add_fingerprinter(self, id, type, config_source):
"""Add WORC Fingerprinter to the network.
@@ -1264,7 +1640,7 @@ Source code for WORC.WORC
memory = self.fastr_memory_parameters['Fingerprinter']
fingerprinter_node = self.network.create_node('worc/Fingerprinter:1.0',
tool_version='1.0',
- id=f'fingerprinter_{id}',
+ id=f'fingerprinter_{id}',
resources=ResourceLimit(memory=memory),
step_id='FingerPrinting')
@@ -1275,10 +1651,10 @@ Source code for WORC.WORC
# Add type input
valid_types = ['classification', 'images']
if type not in valid_types:
- raise WORCexceptions.WORCValueError(f'Type {type} is not valid for fingeprinting. Should be one of {valid_types}.')
+ raise WORCexceptions.WORCValueError(f'Type {type} is not valid for fingeprinting. Should be one of {valid_types}.')
type_node = self.network.create_constant('String', type,
- id=f'type_fingerprint_{id}',
+ id=f'type_fingerprint_{id}',
node_group='train',
step_id='FingerPrinting')
fingerprinter_node.inputs['type'] = type_node.output
@@ -1319,7 +1695,7 @@ Source code for WORC.WORC
self.links_Combat_out_train.collapse = 'ComBat'
self.sinks_features_train_ComBat.input = self.ComBat.outputs['features_train_out']
- if self.TrainTest:
+ if self.TrainTest or self.OnlyTest:
# Create sink for ComBat output
self.sinks_features_test_ComBat = self.network.create_sink('HDF5', id='features_test_ComBat', step_id='ComBat')
@@ -1334,21 +1710,32 @@ Source code for WORC.WORC
[docs] def add_preprocessing(self, preprocess_node, label, nmod):
"""Add nodes required for preprocessing of images."""
+
+ # Extract some general information on the setup
+ if type(self.configs[0]) == str:
+ # Config is a .ini file, load
+ temp_conf = config_io.load_config(self.configs[nmod])
+ else:
+ temp_conf = self.configs[nmod]
+
memory = self.fastr_memory_parameters['Preprocessing']
- self.preprocessing_train[label] = self.network.create_node(preprocess_node, tool_version='1.0', id='preprocessing_train_' + label, resources=ResourceLimit(memory=memory), step_id='Preprocessing')
+ if not self.OnlyTest:
+ self.preprocessing_train[label] = self.network.create_node(preprocess_node, tool_version='1.0', id='preprocessing_train_' + label, resources=ResourceLimit(memory=memory), step_id='Preprocessing')
+
if self.TrainTest:
self.preprocessing_test[label] = self.network.create_node(preprocess_node, tool_version='1.0', id='preprocessing_test_' + label, resources=ResourceLimit(memory=memory), step_id='Preprocessing')
# Create required links
- if self.configs[0]['General']['Fingerprint'] == 'True':
- self.preprocessing_train[label].inputs['parameters'] = self.node_fingerprinters[label].outputs['config']
- else:
- self.preprocessing_train[label].inputs['parameters'] = self.sources_parameters[label].output
+ if not self.OnlyTest:
+ if temp_conf['General']['Fingerprint'] == 'True':
+ self.preprocessing_train[label].inputs['parameters'] = self.node_fingerprinters[label].outputs['config']
+ else:
+ self.preprocessing_train[label].inputs['parameters'] = self.sources_parameters[label].output
- self.preprocessing_train[label].inputs['image'] = self.converters_im_train[label].outputs['image']
+ self.preprocessing_train[label].inputs['image'] = self.converters_im_train[label].outputs['image']
if self.TrainTest:
- if self.configs[0]['General']['Fingerprint'] == 'True':
+ if temp_conf['General']['Fingerprint'] == 'True' and not self.OnlyTest:
self.preprocessing_test[label].inputs['parameters'] = self.node_fingerprinters[label].outputs['config']
else:
self.preprocessing_test[label].inputs['parameters'] = self.sources_parameters[label].output
@@ -1378,12 +1765,13 @@ Source code for WORC.WORC
label])
memory = self.fastr_memory_parameters['FeatureCalculator']
- node_train =\
- self.network.create_node(calcfeat_node,
- tool_version='1.0',
- id='calcfeatures_train_' + node_ID,
- resources=ResourceLimit(memory=memory),
- step_id='Feature_Extraction')
+ if not self.OnlyTest:
+ node_train =\
+ self.network.create_node(calcfeat_node,
+ tool_version='1.0',
+ id='calcfeatures_train_' + node_ID,
+ resources=ResourceLimit(memory=memory),
+ step_id='Feature_Extraction')
if self.TrainTest:
node_test =\
@@ -1410,8 +1798,9 @@ Source code for WORC.WORC
id='format_pyradiomics_' + label,
node_group='train',
step_id='Feature_Extraction')
- node_train.inputs['format'] =\
- self.source_format_pyradiomics.output
+ if not self.OnlyTest:
+ node_train.inputs['format'] =\
+ self.source_format_pyradiomics.output
if self.TrainTest:
node_test.inputs['format'] =\
@@ -1419,25 +1808,37 @@ Source code for WORC.WORC
# Create required links
# We can have a different config for different tools
- if 'pyradiomics' in calcfeat_node.lower():
- if self.configs[0]['General']['Fingerprint'] != 'True':
- node_train.inputs['parameters'] =\
- self.source_config_pyradiomics[label].output
+ if not self.OnlyTest:
+ if 'pyradiomics' in calcfeat_node.lower():
+ if self.configs[0]['General']['Fingerprint'] != 'True':
+ node_train.inputs['parameters'] =\
+ self.source_config_pyradiomics[label].output
+ else:
+ node_train.inputs['parameters'] =\
+ self.node_fingerprinters[label].outputs['config_pyradiomics']
else:
- node_train.inputs['parameters'] =\
- self.node_fingerprinters[label].outputs['config_pyradiomics']
- else:
- if self.configs[0]['General']['Fingerprint'] == 'True':
- node_train.inputs['parameters'] =\
- self.node_fingerprinters[label].outputs['config']
+ if self.configs[0]['General']['Fingerprint'] == 'True':
+ node_train.inputs['parameters'] =\
+ self.node_fingerprinters[label].outputs['config']
+ else:
+ node_train.inputs['parameters'] =\
+ self.sources_parameters[label].output
+
+ node_train.inputs['image'] =\
+ self.preprocessing_train[label].outputs['image']
+
+ if self.OnlyTest:
+ if 'pyradiomics' in calcfeat_node.lower():
+ node_test.inputs['parameters'] =\
+ self.source_config_pyradiomics[label].output
else:
- node_train.inputs['parameters'] =\
+ node_test.inputs['parameters'] =\
self.sources_parameters[label].output
- node_train.inputs['image'] =\
- self.preprocessing_train[label].outputs['image']
-
- if self.TrainTest:
+ node_test.inputs['image'] =\
+ self.preprocessing_test[label].outputs['image']
+
+ elif self.TrainTest:
if 'pyradiomics' in calcfeat_node.lower():
if self.configs[0]['General']['Fingerprint'] != 'True':
node_test.inputs['parameters'] =\
@@ -1485,14 +1886,15 @@ Source code for WORC.WORC
self.sources_semantics_test[label].output
# Add feature converter to make features WORC compatible
- conv_train =\
- self.network.create_node('worc/FeatureConverter:1.0',
- tool_version='1.0',
- id='featureconverter_train_' + node_ID,
- resources=ResourceLimit(memory='4G'),
- step_id='Feature_Extraction')
+ if not self.OnlyTest:
+ conv_train =\
+ self.network.create_node('worc/FeatureConverter:1.0',
+ tool_version='1.0',
+ id='featureconverter_train_' + node_ID,
+ resources=ResourceLimit(memory='4G'),
+ step_id='Feature_Extraction')
- conv_train.inputs['feat_in'] = node_train.outputs['features']
+ conv_train.inputs['feat_in'] = node_train.outputs['features']
# Add source to tell converter which toolbox we use
if 'pyradiomics' in calcfeat_node.lower():
@@ -1500,20 +1902,21 @@ Source code for WORC.WORC
elif 'predict' in calcfeat_node.lower():
toolbox = 'PREDICT'
else:
- message = f'Toolbox {calcfeat_node} not recognized!'
+ message = f'Toolbox {calcfeat_node} not recognized!'
raise WORCexceptions.WORCKeyError(message)
self.source_toolbox_name[label] =\
self.network.create_constant('String', toolbox,
- id=f'toolbox_name_{toolbox}_{label}',
+ id=f'toolbox_name_{toolbox}_{label}',
step_id='Feature_Extraction')
- conv_train.inputs['toolbox'] = self.source_toolbox_name[label].output
- if self.configs[0]['General']['Fingerprint'] == 'True':
- conv_train.inputs['config'] =\
- self.node_fingerprinters[label].outputs['config']
- else:
- conv_train.inputs['config'] = self.sources_parameters[label].output
+ if not self.OnlyTest:
+ conv_train.inputs['toolbox'] = self.source_toolbox_name[label].output
+ if self.configs[0]['General']['Fingerprint'] == 'True':
+ conv_train.inputs['config'] =\
+ self.node_fingerprinters[label].outputs['config']
+ else:
+ conv_train.inputs['config'] = self.sources_parameters[label].output
if self.TrainTest:
conv_test =\
@@ -1525,7 +1928,10 @@ Source code for WORC.WORC
conv_test.inputs['feat_in'] = node_test.outputs['features']
conv_test.inputs['toolbox'] = self.source_toolbox_name[label].output
- if self.configs[0]['General']['Fingerprint'] == 'True':
+ if self.OnlyTest:
+ conv_test.inputs['config'] =\
+ self.sources_parameters[label].output
+ elif self.configs[0]['General']['Fingerprint'] == 'True':
conv_test.inputs['config'] =\
self.node_fingerprinters[label].outputs['config']
else:
@@ -1533,8 +1939,10 @@ Source code for WORC.WORC
self.sources_parameters[label].output
# Append to nodes to list
- self.calcfeatures_train[label].append(node_train)
- self.featureconverter_train[label].append(conv_train)
+ if not self.OnlyTest:
+ self.calcfeatures_train[label].append(node_train)
+ self.featureconverter_train[label].append(conv_train)
+
if self.TrainTest:
self.calcfeatures_test[label].append(node_test)
self.featureconverter_test[label].append(conv_test)
@@ -1545,25 +1953,28 @@ Source code for WORC.WORC
self.segmode = 'Register'
self.source_Elastix_Parameters = dict()
- self.elastix_nodes_train = dict()
- self.transformix_seg_nodes_train = dict()
- self.sources_segmentations_train = dict()
- self.sinks_transformations_train = dict()
- self.sinks_segmentations_elastix_train = dict()
- self.sinks_images_elastix_train = dict()
- self.converters_seg_train = dict()
- self.edittransformfile_nodes_train = dict()
- self.transformix_im_nodes_train = dict()
-
- self.elastix_nodes_test = dict()
- self.transformix_seg_nodes_test = dict()
- self.sources_segmentations_test = dict()
- self.sinks_transformations_test = dict()
- self.sinks_segmentations_elastix_test = dict()
- self.sinks_images_elastix_test = dict()
- self.converters_seg_test = dict()
- self.edittransformfile_nodes_test = dict()
- self.transformix_im_nodes_test = dict()
+
+ if not self.OnlyTest:
+ self.elastix_nodes_train = dict()
+ self.transformix_seg_nodes_train = dict()
+ self.sources_segmentations_train = dict()
+ self.sinks_transformations_train = dict()
+ self.sinks_segmentations_elastix_train = dict()
+ self.sinks_images_elastix_train = dict()
+ self.converters_seg_train = dict()
+ self.edittransformfile_nodes_train = dict()
+ self.transformix_im_nodes_train = dict()
+
+ if self.TrainTest:
+ self.elastix_nodes_test = dict()
+ self.transformix_seg_nodes_test = dict()
+ self.sources_segmentations_test = dict()
+ self.sinks_transformations_test = dict()
+ self.sinks_segmentations_elastix_test = dict()
+ self.sinks_images_elastix_test = dict()
+ self.converters_seg_test = dict()
+ self.edittransformfile_nodes_test = dict()
+ self.transformix_im_nodes_test = dict()
[docs] def add_elastix(self, label, nmod):
""" Add image registration through elastix to network."""
@@ -1571,21 +1982,22 @@ Source code for WORC.WORC
# which should be on the first modality
if nmod == 0:
memory = self.fastr_memory_parameters['WORCCastConvert']
- self.sources_segmentations_train[label] =\
- self.network.create_source('ITKImageFile',
- id='segmentations_train_' + label,
- node_group='train',
- step_id='train_sources')
-
- self.converters_seg_train[label] =\
- self.network.create_node('worc/WORCCastConvert:0.3.2',
- tool_version='0.1',
- id='convert_seg_train_' + label,
- resources=ResourceLimit(memory=memory),
- step_id='FileConversion')
+ if not self.OnlyTest:
+ self.sources_segmentations_train[label] =\
+ self.network.create_source('ITKImageFile',
+ id='segmentations_train_' + label,
+ node_group='train',
+ step_id='train_sources')
- self.converters_seg_train[label].inputs['image'] =\
- self.sources_segmentations_train[label].output
+ self.converters_seg_train[label] =\
+ self.network.create_node('worc/WORCCastConvert:0.3.2',
+ tool_version='0.1',
+ id='convert_seg_train_' + label,
+ resources=ResourceLimit(memory=memory),
+ step_id='FileConversion')
+
+ self.converters_seg_train[label].inputs['image'] =\
+ self.sources_segmentations_train[label].output
if self.TrainTest:
self.sources_segmentations_test[label] =\
@@ -1615,27 +2027,28 @@ Source code for WORC.WORC
str(self.configs[0]['General']['TransformationNode'])
memory_elastix = self.fastr_memory_parameters['Elastix']
- self.elastix_nodes_train[label] =\
- self.network.create_node(elastix_node,
- tool_version='0.2',
- id='elastix_train_' + label,
- resources=ResourceLimit(memory=memory_elastix),
- step_id='Image_Registration')
-
- memory_transformix = self.fastr_memory_parameters['Elastix']
- self.transformix_seg_nodes_train[label] =\
- self.network.create_node(transformix_node,
- tool_version='0.2',
- id='transformix_seg_train_' + label,
- resources=ResourceLimit(memory=memory_transformix),
- step_id='Image_Registration')
-
- self.transformix_im_nodes_train[label] =\
- self.network.create_node(transformix_node,
- tool_version='0.2',
- id='transformix_im_train_' + label,
- resources=ResourceLimit(memory=memory_transformix),
- step_id='Image_Registration')
+ if not self.OnlyTest:
+ self.elastix_nodes_train[label] =\
+ self.network.create_node(elastix_node,
+ tool_version='0.2',
+ id='elastix_train_' + label,
+ resources=ResourceLimit(memory=memory_elastix),
+ step_id='Image_Registration')
+
+ memory_transformix = self.fastr_memory_parameters['Elastix']
+ self.transformix_seg_nodes_train[label] =\
+ self.network.create_node(transformix_node,
+ tool_version='0.2',
+ id='transformix_seg_train_' + label,
+ resources=ResourceLimit(memory=memory_transformix),
+ step_id='Image_Registration')
+
+ self.transformix_im_nodes_train[label] =\
+ self.network.create_node(transformix_node,
+ tool_version='0.2',
+ id='transformix_im_train_' + label,
+ resources=ResourceLimit(memory=memory_transformix),
+ step_id='Image_Registration')
if self.TrainTest:
self.elastix_nodes_test[label] =\
@@ -1661,15 +2074,16 @@ Source code for WORC.WORC
# Create sources_segmentation
# M1 = moving, others = fixed
- self.elastix_nodes_train[label].inputs['fixed_image'] =\
- self.converters_im_train[label].outputs['image']
+ if not self.OnlyTest:
+ self.elastix_nodes_train[label].inputs['fixed_image'] =\
+ self.converters_im_train[label].outputs['image']
- self.elastix_nodes_train[label].inputs['moving_image'] =\
- self.converters_im_train[self.modlabels[0]].outputs['image']
+ self.elastix_nodes_train[label].inputs['moving_image'] =\
+ self.converters_im_train[self.modlabels[0]].outputs['image']
# Add node that copies metadata from the image to the
# segmentation if required
- if self.CopyMetadata:
+ if self.CopyMetadata and not self.OnlyTest:
# Copy metadata from the image which was registered to
# the segmentation, if it is not created yet
if not hasattr(self, "copymetadata_nodes_train"):
@@ -1731,12 +2145,12 @@ Source code for WORC.WORC
id='Elastix_Para_' + label,
node_group='elpara',
step_id='Image_Registration')
+ if not self.OnlyTest:
+ self.link_elparam_train =\
+ self.network.create_link(self.source_Elastix_Parameters[label].output,
+ self.elastix_nodes_train[label].inputs['parameters'])
- self.link_elparam_train =\
- self.network.create_link(self.source_Elastix_Parameters[label].output,
- self.elastix_nodes_train[label].inputs['parameters'])
-
- self.link_elparam_train.collapse = 'elpara'
+ self.link_elparam_train.collapse = 'elpara'
if self.TrainTest:
self.link_elparam_test =\
@@ -1760,17 +2174,18 @@ Source code for WORC.WORC
self.converters_masks_test[self.modlabels[0]].outputs['image']
# Change the FinalBSpline Interpolation order to 0 as required for binarie images: see https://github.com/SuperElastix/elastix/wiki/FAQ
- self.edittransformfile_nodes_train[label] =\
- self.network.create_node('elastixtools/EditElastixTransformFile:0.1',
- tool_version='0.1',
- id='EditElastixTransformFile_train_' + label,
- step_id='Image_Registration')
+ if not self.OnlyTest:
+ self.edittransformfile_nodes_train[label] =\
+ self.network.create_node('elastixtools/EditElastixTransformFile:0.1',
+ tool_version='0.1',
+ id='EditElastixTransformFile_train_' + label,
+ step_id='Image_Registration')
- self.edittransformfile_nodes_train[label].inputs['set'] =\
- ["FinalBSplineInterpolationOrder=0"]
+ self.edittransformfile_nodes_train[label].inputs['set'] =\
+ ["FinalBSplineInterpolationOrder=0"]
- self.edittransformfile_nodes_train[label].inputs['transform'] =\
- self.elastix_nodes_train[label].outputs['transform'][-1]
+ self.edittransformfile_nodes_train[label].inputs['transform'] =\
+ self.elastix_nodes_train[label].outputs['transform'][-1]
if self.TrainTest:
self.edittransformfile_nodes_test[label] =\
@@ -1786,14 +2201,15 @@ Source code for WORC.WORC
self.elastix_nodes_test[label].outputs['transform'][-1]
# Link data and transformation to transformix and source
- self.transformix_seg_nodes_train[label].inputs['transform'] =\
- self.edittransformfile_nodes_train[label].outputs['transform']
+ if not self.OnlyTest:
+ self.transformix_seg_nodes_train[label].inputs['transform'] =\
+ self.edittransformfile_nodes_train[label].outputs['transform']
- self.transformix_im_nodes_train[label].inputs['transform'] =\
- self.elastix_nodes_train[label].outputs['transform'][-1]
+ self.transformix_im_nodes_train[label].inputs['transform'] =\
+ self.elastix_nodes_train[label].outputs['transform'][-1]
- self.transformix_im_nodes_train[label].inputs['image'] =\
- self.converters_im_train[self.modlabels[0]].outputs['image']
+ self.transformix_im_nodes_train[label].inputs['image'] =\
+ self.converters_im_train[self.modlabels[0]].outputs['image']
if self.TrainTest:
self.transformix_seg_nodes_test[label].inputs['transform'] =\
@@ -1806,38 +2222,44 @@ Source code for WORC.WORC
self.converters_im_test[self.modlabels[0]].outputs['image']
if self.configs[nmod]['General']['Segmentix'] != 'True':
- # These segmentations serve as input for the feature calculation
- for i_node in range(len(self.calcfeatures_train[label])):
- self.calcfeatures_train[label][i_node].inputs['segmentation'] =\
- self.transformix_seg_nodes_train[label].outputs['image']
- if self.TrainTest:
+ if not self.OnlyTest:
+ # These segmentations serve as input for the feature calculation
+ for i_node in range(len(self.calcfeatures_train[label])):
+ self.calcfeatures_train[label][i_node].inputs['segmentation'] =\
+ self.transformix_seg_nodes_train[label].outputs['image']
+ if self.TrainTest:
+ self.calcfeatures_test[label][i_node].inputs['segmentation'] =\
+ self.transformix_seg_nodes_test[label].outputs['image']
+ else:
+ for i_node in range(len(self.calcfeatures_test[label])):
self.calcfeatures_test[label][i_node].inputs['segmentation'] =\
self.transformix_seg_nodes_test[label].outputs['image']
# Save outputfor the training set
- self.sinks_transformations_train[label] =\
- self.network.create_sink('ElastixTransformFile',
- id='transformations_train_' + label,
- step_id='train_sinks')
+ if not self.OnlyTest:
+ self.sinks_transformations_train[label] =\
+ self.network.create_sink('ElastixTransformFile',
+ id='transformations_train_' + label,
+ step_id='train_sinks')
- self.sinks_segmentations_elastix_train[label] =\
- self.network.create_sink('ITKImageFile',
- id='segmentations_out_elastix_train_' + label,
- step_id='train_sinks')
+ self.sinks_segmentations_elastix_train[label] =\
+ self.network.create_sink('ITKImageFile',
+ id='segmentations_out_elastix_train_' + label,
+ step_id='train_sinks')
- self.sinks_images_elastix_train[label] =\
- self.network.create_sink('ITKImageFile',
- id='images_out_elastix_train_' + label,
- step_id='train_sinks')
+ self.sinks_images_elastix_train[label] =\
+ self.network.create_sink('ITKImageFile',
+ id='images_out_elastix_train_' + label,
+ step_id='train_sinks')
- self.sinks_transformations_train[label].input =\
- self.elastix_nodes_train[label].outputs['transform']
+ self.sinks_transformations_train[label].input =\
+ self.elastix_nodes_train[label].outputs['transform']
- self.sinks_segmentations_elastix_train[label].input =\
- self.transformix_seg_nodes_train[label].outputs['image']
+ self.sinks_segmentations_elastix_train[label].input =\
+ self.transformix_seg_nodes_train[label].outputs['image']
- self.sinks_images_elastix_train[label].input =\
- self.transformix_im_nodes_train[label].outputs['image']
+ self.sinks_images_elastix_train[label].input =\
+ self.transformix_im_nodes_train[label].outputs['image']
# Save output for the test set
if self.TrainTest:
@@ -1866,53 +2288,56 @@ Source code for WORC.WORC
# Segmentix nodes -------------------------------------------------
# Use segmentix node to convert input segmentation into
# correct contour
- if label not in self.sinks_segmentations_segmentix_train:
- self.sinks_segmentations_segmentix_train[label] =\
- self.network.create_sink('ITKImageFile',
- id='segmentations_out_segmentix_train_' + label,
- step_id='train_sinks')
+ if not self.OnlyTest:
+ if label not in self.sinks_segmentations_segmentix_train:
+ self.sinks_segmentations_segmentix_train[label] =\
+ self.network.create_sink('ITKImageFile',
+ id='segmentations_out_segmentix_train_' + label,
+ step_id='train_sinks')
- memory = self.fastr_memory_parameters['Segmentix']
- self.nodes_segmentix_train[label] =\
- self.network.create_node('segmentix/Segmentix:1.0',
- tool_version='1.0',
- id='segmentix_train_' + label,
- resources=ResourceLimit(memory=memory),
- step_id='Preprocessing')
+ memory = self.fastr_memory_parameters['Segmentix']
+ self.nodes_segmentix_train[label] =\
+ self.network.create_node('segmentix/Segmentix:1.0',
+ tool_version='1.0',
+ id='segmentix_train_' + label,
+ resources=ResourceLimit(memory=memory),
+ step_id='Preprocessing')
- # Input the image
- self.nodes_segmentix_train[label].inputs['image'] =\
- self.converters_im_train[label].outputs['image']
+ # Input the image
+ self.nodes_segmentix_train[label].inputs['image'] =\
+ self.converters_im_train[label].outputs['image']
# Input the metadata
if self.metadata_train and len(self.metadata_train) >= nmod + 1:
self.nodes_segmentix_train[label].inputs['metadata'] = self.sources_metadata_train[label].output
# Input the segmentation
- if hasattr(self, 'transformix_seg_nodes_train'):
- if label in self.transformix_seg_nodes_train.keys():
- # Use output of registration in segmentix
- self.nodes_segmentix_train[label].inputs['segmentation_in'] =\
- self.transformix_seg_nodes_train[label].outputs['image']
+ if not self.OnlyTest:
+ if hasattr(self, 'transformix_seg_nodes_train'):
+ if label in self.transformix_seg_nodes_train.keys():
+ # Use output of registration in segmentix
+ self.nodes_segmentix_train[label].inputs['segmentation_in'] =\
+ self.transformix_seg_nodes_train[label].outputs['image']
+ else:
+ # Use original segmentation
+ self.nodes_segmentix_train[label].inputs['segmentation_in'] =\
+ self.converters_seg_train[label].outputs['image']
else:
# Use original segmentation
self.nodes_segmentix_train[label].inputs['segmentation_in'] =\
self.converters_seg_train[label].outputs['image']
- else:
- # Use original segmentation
- self.nodes_segmentix_train[label].inputs['segmentation_in'] =\
- self.converters_seg_train[label].outputs['image']
# Input the parameters
- if self.configs[0]['General']['Fingerprint'] == 'True':
- self.nodes_segmentix_train[label].inputs['parameters'] =\
- self.node_fingerprinters[label].outputs['config']
- else:
- self.nodes_segmentix_train[label].inputs['parameters'] =\
- self.sources_parameters[label].output
+ if not self.OnlyTest:
+ if self.configs[0]['General']['Fingerprint'] == 'True':
+ self.nodes_segmentix_train[label].inputs['parameters'] =\
+ self.node_fingerprinters[label].outputs['config']
+ else:
+ self.nodes_segmentix_train[label].inputs['parameters'] =\
+ self.sources_parameters[label].output
- self.sinks_segmentations_segmentix_train[label].input =\
- self.nodes_segmentix_train[label].outputs['segmentation_out']
+ self.sinks_segmentations_segmentix_train[label].input =\
+ self.nodes_segmentix_train[label].outputs['segmentation_out']
if self.TrainTest:
self.sinks_segmentations_segmentix_test[label] =\
@@ -1949,7 +2374,7 @@ Source code for WORC.WORC
self.nodes_segmentix_test[label].inputs['segmentation_in'] =\
self.converters_seg_test[label].outputs['image']
- if self.configs[0]['General']['Fingerprint'] == 'True':
+ if self.configs[0]['General']['Fingerprint'] == 'True' and not self.OnlyTest:
self.nodes_segmentix_test[label].inputs['parameters'] =\
self.node_fingerprinters[label].outputs['config']
else:
@@ -1959,14 +2384,19 @@ Source code for WORC.WORC
self.sinks_segmentations_segmentix_test[label].input =\
self.nodes_segmentix_test[label].outputs['segmentation_out']
- for i_node in range(len(self.calcfeatures_train[label])):
- self.calcfeatures_train[label][i_node].inputs['segmentation'] =\
- self.nodes_segmentix_train[label].outputs['segmentation_out']
+ if not self.OnlyTest:
+ for i_node in range(len(self.calcfeatures_train[label])):
+ self.calcfeatures_train[label][i_node].inputs['segmentation'] =\
+ self.nodes_segmentix_train[label].outputs['segmentation_out']
- if self.TrainTest:
+ if self.TrainTest:
+ self.calcfeatures_test[label][i_node].inputs['segmentation'] =\
+ self.nodes_segmentix_test[label].outputs['segmentation_out']
+ else:
+ for i_node in range(len(self.calcfeatures_test[label])):
self.calcfeatures_test[label][i_node].inputs['segmentation'] =\
self.nodes_segmentix_test[label].outputs['segmentation_out']
-
+
if self.masks_train and len(self.masks_train) >= nmod + 1:
# Use masks
self.nodes_segmentix_train[label].inputs['mask'] =\
@@ -1984,7 +2414,10 @@ Source code for WORC.WORC
self.sink_data = dict()
# Save the configurations as files
- self.save_config()
+ if not self.OnlyTest:
+ self.save_config()
+ else:
+ self.fastrconfigs = self.configs
# fixed splits
if self.fixedsplits:
@@ -1993,6 +2426,7 @@ Source code for WORC.WORC
# Set source and sink data
self.source_data['patientclass_train'] = self.labels_train
self.source_data['patientclass_test'] = self.labels_test
+ self.source_data['trained_model'] = self.trained_model
self.sink_data['classification'] = ("vfs://output/{}/estimator_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name)
self.sink_data['performance'] = ("vfs://output/{}/performance_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name)
@@ -2001,12 +2435,19 @@ Source code for WORC.WORC
self.sink_data['features_train_ComBat'] = ("vfs://output/{}/ComBat/features_ComBat_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name)
self.sink_data['features_test_ComBat'] = ("vfs://output/{}/ComBat/features_ComBat_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name)
+ # Get info from the first config file
+ if type(self.configs[0]) == str:
+ # Config is a .ini file, load
+ temp_conf = config_io.load_config(self.configs[0])
+ else:
+ temp_conf = self.configs[0]
+
# Set the source data from the WORC objects you created
for num, label in enumerate(self.modlabels):
self.source_data['config_' + label] = self.fastrconfigs[num]
- self.sink_data[f'config_{label}_sink'] = f"vfs://output/{self.name}/config_{label}_{{sample_id}}_{{cardinality}}{{ext}}"
+ self.sink_data[f'config_{label}_sink'] = f"vfs://output/{self.name}/config_{label}_{{sample_id}}_{{cardinality}}{{ext}}"
- if 'pyradiomics' in self.configs[0]['General']['FeatureCalculators'] and self.configs[0]['General']['Fingerprint'] != 'True':
+ if 'pyradiomics' in temp_conf['General']['FeatureCalculators'] and temp_conf['General']['Fingerprint'] != 'True':
self.source_data['config_pyradiomics_' + label] = self.pyradiomics_configs[num]
# Add train data sources
@@ -2076,6 +2517,7 @@ Source code for WORC.WORC
self.sink_data['images_out_elastix_test_' + label] = ("vfs://output/{}/Images/im_{}_elastix_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name, label)
if hasattr(self, 'featurecalculators'):
for f in self.featurecalculators[label]:
+ f = f.replace(':', '_').replace('.', '_').replace('/', '_')
self.sink_data['features_test_' + label + '_' + f] = ("vfs://output/{}/Features/features_{}_{}_{{sample_id}}_{{cardinality}}{{ext}}").format(self.name, f, label)
# Add elastix sinks if used
@@ -2104,21 +2546,22 @@ Source code for WORC.WORC
except graphviz.backend.ExecutableNotFound:
print('[WORC WARNING] Graphviz executable not found: not drawing network diagram. Make sure the Graphviz executables are on your systems PATH.')
except graphviz.backend.CalledProcessError as e:
- print(f'[WORC WARNING] Graphviz executable gave an error: not drawing network diagram. Original error: {e}')
+ print(f'[WORC WARNING] Graphviz executable gave an error: not drawing network diagram. Original error: {e}')
- # export hyper param. search space to LaTeX table
- for config in self.fastrconfigs:
- config_path = Path(url2pathname(urlparse(config).path))
- tex_path = f'{config_path.parent.absolute() / config_path.stem}_hyperparams_space.tex'
- export_hyper_params_to_latex(config_path, tex_path)
+ # export hyper param. search space to LaTeX table. Only for training models.
+ if not self.OnlyTest:
+ for config in self.fastrconfigs:
+ config_path = Path(url2pathname(urlparse(config).path))
+ tex_path = f'{config_path.parent.absolute() / config_path.stem}_hyperparams_space.tex'
+ export_hyper_params_to_latex(config_path, tex_path)
if DebugDetector().do_detection():
print("Source Data:")
for k in self.source_data.keys():
- print(f"\t {k}: {self.source_data[k]}.")
+ print(f"\t {k}: {self.source_data[k]}.")
print("\n Sink Data:")
for k in self.sink_data.keys():
- print(f"\t {k}: {self.sink_data[k]}.")
+ print(f"\t {k}: {self.sink_data[k]}.")
# When debugging, set the tempdir to the default of fastr + name
self.fastr_tmpdir = os.path.join(fastr.config.mounts['tmp'],
@@ -2157,7 +2600,7 @@ Source code for WORC.WORC
config.read(c)
c = config
- cfile = os.path.join(self.fastr_tmpdir, f"config_{self.name}_{num}.ini")
+ cfile = os.path.join(self.fastr_tmpdir, f"config_{self.name}_{num}.ini")
if not os.path.exists(os.path.dirname(cfile)):
os.makedirs(os.path.dirname(cfile))
@@ -2166,15 +2609,15 @@ Source code for WORC.WORC
# If PyRadiomics is used and there is no finterprinting, also write a config for PyRadiomics
if 'pyradiomics' in c['General']['FeatureCalculators'] and self.configs[0]['General']['Fingerprint'] != 'True':
- cfile_pyradiomics = os.path.join(self.fastr_tmpdir, f"config_pyradiomics_{self.name}_{num}.yaml")
+ cfile_pyradiomics = os.path.join(self.fastr_tmpdir, f"config_pyradiomics_{self.name}_{num}.yaml")
config_pyradiomics = io.convert_config_pyradiomics(c)
with open(cfile_pyradiomics, 'w') as file:
yaml.safe_dump(config_pyradiomics, file)
- cfile_pyradiomics = Path(self.fastr_tmpdir) / f"config_pyradiomics_{self.name}_{num}.yaml"
+ cfile_pyradiomics = Path(self.fastr_tmpdir) / f"config_pyradiomics_{self.name}_{num}.yaml"
self.pyradiomics_configs.append(cfile_pyradiomics.as_uri().replace('%20', ' '))
# BUG: Make path with pathlib to create windows double slashes
- cfile = Path(self.fastr_tmpdir) / f"config_{self.name}_{num}.ini"
+ cfile = Path(self.fastr_tmpdir) / f"config_{self.name}_{num}.ini"
self.fastrconfigs.append(cfile.as_uri().replace('%20', ' '))
@@ -2188,7 +2631,7 @@ Source code for WORC.WORC
3. Slicer pipeline, to create pngs of middle slice of images.
"""
-[docs] def __init__(self):
+[docs] def __init__(self):
"""Initialize object with all pipelines."""
self.Elastix = Elastix()
self.Evaluate = Evaluate()
@@ -2199,20 +2642,25 @@ Source code for WORC.WORC
-
@@ -2221,7 +2669,6 @@ Source code for WORC.WORC
-
+
-
-
-
-
+
+
+
-
-
-
-
-
@@ -62,7 +65,7 @@
- 3.6.2
+ 3.6.3
@@ -79,6 +82,7 @@
+
@@ -104,6 +108,7 @@
+
@@ -138,11 +143,13 @@
+
+
- - Docs »
+ - »
- Module code »
@@ -180,49 +187,49 @@ Source code for WORC.classification.SearchCV
# limitations under the License.
import os
-from abc import ABCMeta, abstractmethod
-from collections.abc import Sized
+from abc import ABCMeta, abstractmethod
+from collections.abc import Sized
import numpy as np
import warnings
import numbers
import random
import string
import fastr
-from fastr.api import ResourceLimit
-from joblib import Parallel, delayed
-from scipy.stats import rankdata
+from fastr.api import ResourceLimit
+from joblib import Parallel, delayed
+from scipy.stats import rankdata
import six
import pandas as pd
import json
import glob
-from itertools import islice
+from itertools import islice
import shutil
-from sklearn.model_selection._search import ParameterSampler
-from sklearn.model_selection._search import ParameterGrid, _check_param_grid
-from sklearn.preprocessing import StandardScaler
-from sklearn.base import BaseEstimator, is_classifier, clone
-from sklearn.base import MetaEstimatorMixin
-from sklearn.exceptions import NotFittedError
-from sklearn.utils.metaestimators import if_delegate_has_method
-from sklearn.utils.validation import indexable, check_is_fitted
-from sklearn.model_selection._split import check_cv
-from sklearn.metrics import f1_score, roc_auc_score, mean_squared_error
-from sklearn.metrics import accuracy_score
-from sklearn.multiclass import OneVsRestClassifier
-from sklearn.utils.validation import _check_fit_params
-from sklearn.model_selection._validation import _aggregate_score_dicts
-
-from WORC.classification.fitandscore import fit_and_score, replacenan
-from WORC.classification.metrics import check_multimetric_scoring
-from WORC.classification import construct_classifier as cc
-from WORC.featureprocessing.Preprocessor import Preprocessor
-from WORC.detectors.detectors import DebugDetector
+from sklearn.model_selection._search import ParameterSampler
+from sklearn.model_selection._search import ParameterGrid, _check_param_grid
+from sklearn.preprocessing import StandardScaler
+from sklearn.base import BaseEstimator, is_classifier, clone
+from sklearn.base import MetaEstimatorMixin
+from sklearn.exceptions import NotFittedError
+from sklearn.utils.metaestimators import if_delegate_has_method
+from sklearn.utils.validation import indexable, check_is_fitted
+from sklearn.model_selection._split import check_cv
+from sklearn.metrics import f1_score, roc_auc_score, mean_squared_error
+from sklearn.metrics import accuracy_score
+from sklearn.multiclass import OneVsRestClassifier
+from sklearn.utils.validation import _check_fit_params
+from sklearn.model_selection._validation import _aggregate_score_dicts
+
+from WORC.classification.fitandscore import fit_and_score, replacenan
+from WORC.classification.metrics import check_multimetric_scoring
+from WORC.classification import construct_classifier as cc
+from WORC.featureprocessing.Preprocessor import Preprocessor
+from WORC.detectors.detectors import DebugDetector
import WORC.addexceptions as WORCexceptions
# Imports used in the Bayesian optimization
-from WORC.classification.smac import build_smac_config
-from datetime import datetime
+from WORC.classification.smac import build_smac_config
+from datetime import datetime
import copy
@@ -265,7 +272,7 @@ Source code for WORC.classification.SearchCV
"""Ensemble of BaseSearchCV Estimators."""
# @abstractmethod
-[docs] def __init__(self, estimators):
+[docs] def __init__(self, estimators):
"""Initialize object with list of estimators."""
if not estimators:
message = 'You supplied an empty list of estimators: No ensemble creation possible.'
@@ -523,7 +530,7 @@ Source code for WORC.classification.SearchCV
"""Base class for hyper parameter search with cross-validation."""
[docs] @abstractmethod
- def __init__(self, param_distributions={}, n_iter=10, scoring=None,
+ def __init__(self, param_distributions={}, n_iter=10, scoring=None,
fit_params=None, n_jobs=1, iid=True,
refit=True, cv=None, verbose=0, pre_dispatch='2*n_jobs',
random_state=None, error_score='raise', return_train_score=True,
@@ -600,7 +607,7 @@ Source code for WORC.classification.SearchCV
def _check_is_fitted(self, method_name):
if not self.refit:
- raise NotFittedError(('This GridSearchCV instance was initialized '
+ raise NotFittedError(('This SearchCV instance was initialized '
'with refit=False. %s is '
'available only after refitting on the best '
'parameters. ') % method_name)
@@ -789,12 +796,15 @@ Source code for WORC.classification.SearchCV
if self.best_modelsel is not None:
X = self.best_modelsel.transform(X)
- if self.best_pca is not None:
- X = self.best_pca.transform(X)
-
if self.best_statisticalsel is not None:
X = self.best_statisticalsel.transform(X)
+ if self.best_rfesel is not None:
+ X = self.best_rfesel.transform(X)
+
+ if self.best_pca is not None:
+ X = self.best_pca.transform(X)
+
# Only resampling in training phase, i.e. if we have the labels
if y is not None:
if self.best_Sampler is not None:
@@ -853,7 +863,7 @@ Source code for WORC.classification.SearchCV
try:
array_means = np.average(array, axis=1, weights=weights)
except ZeroDivisionError as e:
- e = f'[WORC Warning] {e}. Setting {key_name} to unweighted.'
+ e = f'[WORC Warning] {e}. Setting {key_name} to unweighted.'
print(e)
array_means = np.average(array, axis=1)
@@ -868,7 +878,7 @@ Source code for WORC.classification.SearchCV
array_means[:, np.newaxis]) ** 2,
axis=1, weights=weights))
except ZeroDivisionError as e:
- e = f'[WORC Warning] {e}. Setting {key_name} to unweighted.'
+ e = f'[WORC Warning] {e}. Setting {key_name} to unweighted.'
print(e)
array_stds = np.sqrt(np.average((array -
array_means[:, np.newaxis]) ** 2,
@@ -1096,7 +1106,7 @@ Source code for WORC.classification.SearchCV
# Associate best options with new fits
(save_data, GroupSel, VarSel, SelectModel, feature_labels, scalers,
- encoders, Imputers, PCAs, StatisticalSel, ReliefSel, Sampler) = out
+ encoders, Imputers, PCAs, StatisticalSel, RFESel, ReliefSel, Sampler) = out
fitted_estimator = save_data[-2]
self.best_groupsel = GroupSel
self.best_scaler = scalers
@@ -1108,6 +1118,7 @@ Source code for WORC.classification.SearchCV
self.best_pca = PCAs
self.best_featlab = feature_labels
self.best_statisticalsel = StatisticalSel
+ self.best_rfesel = RFESel
self.best_reliefsel = ReliefSel
self.best_Sampler = Sampler
self.best_estimator_ = fitted_estimator
@@ -1231,7 +1242,7 @@ Source code for WORC.classification.SearchCV
train, valid, p_all,
return_all=True)
(save_data, GroupSel, VarSel, SelectModel, feature_labels, scalers,
- encoders, Imputers, PCAs, StatisticalSel, ReliefSel, Sampler) = out
+ encoders, Imputers, PCAs, StatisticalSel, RFESel, ReliefSel, Sampler) = out
new_estimator.best_groupsel = GroupSel
new_estimator.best_scaler = scalers
new_estimator.best_varsel = VarSel
@@ -1242,6 +1253,7 @@ Source code for WORC.classification.SearchCV
new_estimator.best_pca = PCAs
new_estimator.best_featlab = feature_labels
new_estimator.best_statisticalsel = StatisticalSel
+ new_estimator.best_rfesel = RFESel
new_estimator.best_reliefsel = ReliefSel
new_estimator.best_Sampler = Sampler
@@ -1381,9 +1393,9 @@ Source code for WORC.classification.SearchCV
self.ensemble_validation_score = best_performance
if verbose:
- print(f"Ensembling best {scoring}: {best_performance}.")
- print(f"Single estimator best {scoring}: {single_estimator_performance}.")
- print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
+ print(f"Ensembling best {scoring}: {best_performance}.")
+ print(f"Single estimator best {scoring}: {single_estimator_performance}.")
+ print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
elif method == 'ForwardSelection':
@@ -1393,7 +1405,7 @@ Source code for WORC.classification.SearchCV
while new_performance > best_performance:
Y_valid_score = copy.deepcopy(base_Y_valid_score)
if verbose:
- print(f"Iteration: {iteration}, best {scoring}: {new_performance}.")
+ print(f"Iteration: {iteration}, best {scoring}: {new_performance}.")
best_performance = new_performance
@@ -1435,9 +1447,9 @@ Source code for WORC.classification.SearchCV
if verbose:
# Print the performance gain
- print(f"Ensembling best {scoring}: {best_performance}.")
- print(f"Single estimator best {scoring}: {single_estimator_performance}.")
- print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
+ print(f"Ensembling best {scoring}: {best_performance}.")
+ print(f"Single estimator best {scoring}: {single_estimator_performance}.")
+ print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
elif method == 'Caruana':
if verbose:
@@ -1448,7 +1460,7 @@ Source code for WORC.classification.SearchCV
while iteration < 20:
Y_valid_score = copy.deepcopy(base_Y_valid_score)
if verbose:
- print(f"Iteration: {iteration}, best {scoring}: {new_performance}.")
+ print(f"Iteration: {iteration}, best {scoring}: {new_performance}.")
if iteration > 1:
# Stack scores: not needed for first iteration
@@ -1494,9 +1506,9 @@ Source code for WORC.classification.SearchCV
if verbose:
# Print the performance gain
- print(f"Ensembling best {scoring}: {best_performance}.")
- print(f"Single estimator best {scoring}: {single_estimator_performance}.")
- print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
+ print(f"Ensembling best {scoring}: {best_performance}.")
+ print(f"Single estimator best {scoring}: {single_estimator_performance}.")
+ print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
elif method == 'Bagging':
if verbose:
@@ -1567,12 +1579,12 @@ Source code for WORC.classification.SearchCV
if verbose:
# Print the performance gain
- print(f"Ensembling best {scoring}: {best_performance}.")
- print(f"Single estimator best {scoring}: {single_estimator_performance}.")
- print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
+ print(f"Ensembling best {scoring}: {best_performance}.")
+ print(f"Single estimator best {scoring}: {single_estimator_performance}.")
+ print(f'Ensemble consists of {len(ensemble)} estimators {ensemble}.')
else:
- print(f'[WORC WARNING] No valid ensemble method given: {method}. Not ensembling')
+ print(f'[WORC WARNING] No valid ensemble method given: {method}. Not ensembling')
return self
# Create the ensemble --------------------------------------------------
@@ -1600,7 +1612,7 @@ Source code for WORC.classification.SearchCV
self.fitted_workflows[enum].predict(np.asarray([X_train[0][0], X_train[1][0]]))
estimators.append(self.fitted_workflows[enum])
except (NotFittedError, ValueError, AttributeError):
- print(f'\t\t - Estimator {enum} not fitted (correctly) yet, refit.')
+ print(f'\t\t - Estimator {enum} not fitted (correctly) yet, refit.')
if self.fitted_workflows[enum] is not None:
estimator = self.fitted_workflows[enum]
else:
@@ -1615,7 +1627,7 @@ Source code for WORC.classification.SearchCV
estimator.predict(np.asarray([X_train[0][0], X_train[1][0]]))
estimators.append(estimator)
except (NotFittedError, ValueError):
- print(f'\t\t - Estimator {enum} could not be fitted (correctly), do not include in ensemble.')
+ print(f'\t\t - Estimator {enum} could not be fitted (correctly), do not include in ensemble.')
else:
# Create the ensemble trained on the full training set
@@ -1625,7 +1637,7 @@ Source code for WORC.classification.SearchCV
for enum, p_all in enumerate(parameters_all):
# Refit a SearchCV object with the provided parameters
if verbose:
- print(f"Refitting estimator {enum + 1} / {nest}.")
+ print(f"Refitting estimator {enum + 1} / {nest}.")
base_estimator = clone(base_estimator)
# Check if we need to create a multiclass estimator
@@ -1641,10 +1653,10 @@ Source code for WORC.classification.SearchCV
base_estimator.predict(np.asarray([X_train[0][0], X_train[1][0]]))
estimators.append(base_estimator)
except (NotFittedError, ValueError):
- print(f'\t\t - Estimator {enum} could not be fitted (correctly), do not include in ensemble.')
+ print(f'\t\t - Estimator {enum} could not be fitted (correctly), do not include in ensemble.')
if not estimators:
- print(f'\t\t - Ensemble is empty, thus go on untill we find an estimator that works and that is the final ensemble.')
+ print(f'\t\t - Ensemble is empty, thus go on untill we find an estimator that works and that is the final ensemble.')
while not estimators:
# We cannot have an empy ensemble, thus go on untill we find an estimator that works
enum += 1
@@ -1667,7 +1679,7 @@ Source code for WORC.classification.SearchCV
estimators.append(base_estimator)
except (NotFittedError, ValueError):
pass
- print(f'\t\t - Needed estimator {enum}.')
+ print(f'\t\t - Needed estimator {enum}.')
self.ensemble = Ensemble(estimators)
self.best_estimator_ = self.ensemble
@@ -1691,7 +1703,7 @@ Source code for WORC.classification.SearchCV
n_splits = cv.get_n_splits(X, y, groups)
if self.verbose > 0 and isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
- print(f"Fitting {n_splits} folds for each of {n_candidates} candidates, totalling {n_candidates * n_splits} fits.")
+ print(f"Fitting {n_splits} folds for each of {n_candidates} candidates, totalling {n_candidates * n_splits} fits.")
cv_iter = list(cv.split(X, y, groups))
@@ -1759,7 +1771,7 @@ Source code for WORC.classification.SearchCV
message = 'One or more of the values in your parameter sampler ' +\
'is either not iterable, or the distribution cannot ' +\
'generate valid samples. Please check your ' +\
- f' parameters. At least {k} gives an error.'
+ f' parameters. At least {k} gives an error.'
raise WORCexceptions.WORCValueError(message)
# Split the parameters files in equal parts
@@ -1771,7 +1783,7 @@ Source code for WORC.classification.SearchCV
for number in k:
temp_dict[number] = parameters_temp[number]
- fname = f'settings_{num}.json'
+ fname = f'settings_{num}.json'
sourcename = os.path.join(tempfolder, 'parameters', fname)
if not os.path.exists(os.path.dirname(sourcename)):
os.makedirs(os.path.dirname(sourcename))
@@ -1779,7 +1791,7 @@ Source code for WORC.classification.SearchCV
json.dump(temp_dict, fp, indent=4)
parameter_files[str(num).zfill(4)] =\
- f'vfs://tmp/GS/{name}/parameters/{fname}'
+ f'vfs://tmp/GS/{name}/parameters/{fname}'
# Create test-train splits
traintest_files = dict()
@@ -1792,13 +1804,13 @@ Source code for WORC.classification.SearchCV
index=source_labels,
name='Train-test data')
- fname = f'traintest_{num}.hdf5'
+ fname = f'traintest_{num}.hdf5'
sourcename = os.path.join(tempfolder, 'traintest', fname)
if not os.path.exists(os.path.dirname(sourcename)):
os.makedirs(os.path.dirname(sourcename))
- traintest_files[str(num).zfill(4)] = f'vfs://tmp/GS/{name}/traintest/{fname}'
+ traintest_files[str(num).zfill(4)] = f'vfs://tmp/GS/{name}/traintest/{fname}'
- sourcelabel = f"Source Data Iteration {num}"
+ sourcelabel = f"Source Data Iteration {num}"
source_data.to_hdf(sourcename, sourcelabel)
num += 1
@@ -1833,10 +1845,10 @@ Source code for WORC.classification.SearchCV
estimatorname = os.path.join(tempfolder, fname)
estimator_data.to_hdf(estimatorname, 'Estimator Data')
- estimatordata = f"vfs://tmp/GS/{name}/{fname}"
+ estimatordata = f"vfs://tmp/GS/{name}/{fname}"
# Create the fastr network
- network = fastr.create_network('WORC_GridSearch_' + name)
+ network = fastr.create_network('WORC_CASH_' + name)
estimator_data = network.create_source('HDF5', id='estimator_source', resources=ResourceLimit(memory='4G'))
traintest_data = network.create_source('HDF5', id='traintest', resources=ResourceLimit(memory='4G'))
parameter_data = network.create_source('JsonFile', id='parameters', resources=ResourceLimit(memory='4G'))
@@ -1860,7 +1872,7 @@ Source code for WORC.classification.SearchCV
source_data = {'estimator_source': estimatordata,
'traintest': traintest_files,
'parameters': parameter_files}
- sink_data = {'output': f"vfs://tmp/GS/{name}/output_{{sample_id}}_{{cardinality}}{{ext}}"}
+ sink_data = {'output': f"vfs://tmp/GS/{name}/output_{{sample_id}}_{{cardinality}}{{ext}}"}
network.execute(source_data, sink_data,
tmpdir=os.path.join(tempfolder, 'tmp'),
@@ -1874,12 +1886,12 @@ Source code for WORC.classification.SearchCV
difference = expected_no_files - len(sink_files)
fname = os.path.join(tempfolder, 'tmp')
message = ('Fitting classifiers has failed for ' +
- f'{difference} / {expected_no_files} files. The temporary ' +
- f'results where not deleted and can be found in {tempfolder}. ' +
+ f'{difference} / {expected_no_files} files. The temporary ' +
+ f'results where not deleted and can be found in {tempfolder}. ' +
'Probably your fitting and scoring failed: check out ' +
'the tmp/fitandscore folder within the tempfolder for ' +
'the fastr job temporary results or run: fastr trace ' +
- f'"{fname}{os.path.sep}__sink_data__.json" --samples.')
+ f'"{fname}{os.path.sep}__sink_data__.json" --samples.')
raise WORCexceptions.WORCValueError(message)
# Read in the output data once finished
@@ -2166,7 +2178,7 @@ Source code for WORC.classification.SearchCV
"""
-[docs] def __init__(self, param_distributions={}, n_iter=10, scoring=None,
+[docs] def __init__(self, param_distributions={}, n_iter=10, scoring=None,
fit_params=None, n_jobs=1, iid=True, refit=True, cv=None,
verbose=0, pre_dispatch='2*n_jobs', random_state=None,
error_score='raise', return_train_score=True,
@@ -2224,7 +2236,7 @@ Source code for WORC.classification.SearchCV
n_splits = cv.get_n_splits(X, y, groups)
if self.verbose > 0 and isinstance(parameter_iterable, Sized):
n_candidates = len(parameter_iterable)
- print(f"Fitting {n_splits} folds for each of {n_candidates}" +\
+ print(f"Fitting {n_splits} folds for each of {n_candidates}" +\
" candidates, totalling" +\
" {n_candidates * n_splits} fits")
@@ -2563,7 +2575,7 @@ Source code for WORC.classification.SearchCV
"""
-[docs] def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
+[docs] def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
n_jobs=1, iid=True, refit=True, cv=None, verbose=0,
pre_dispatch='2*n_jobs', error_score='raise',
return_train_score=True):
@@ -2806,7 +2818,7 @@ Source code for WORC.classification.SearchCV
"""
-[docs] def __init__(self, param_distributions={}, n_iter=10, scoring=None,
+[docs] def __init__(self, param_distributions={}, n_iter=10, scoring=None,
fit_params=None, n_jobs=1, iid=True, refit=True, cv=None,
verbose=0, pre_dispatch='2*n_jobs', random_state=None,
error_score='raise', return_train_score=True,
@@ -3073,7 +3085,7 @@ Source code for WORC.classification.SearchCV
"""
-[docs] def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
+[docs] def __init__(self, estimator, param_grid, scoring=None, fit_params=None,
n_jobs=1, iid=True, refit=True, cv=None, verbose=0,
pre_dispatch='2*n_jobs', error_score='raise',
return_train_score=True):
@@ -3176,7 +3188,7 @@ Source code for WORC.classification.SearchCV
estimatorname = os.path.join(tempfolder, fname)
estimator_data.to_hdf(estimatorname, 'Estimator Data')
- estimatordata = f"vfs://tmp/GS/{name}/{fname}"
+ estimatordata = f"vfs://tmp/GS/{name}/{fname}"
# Create the files containing the instance data
instance_labels = ['run_id', 'run_rng', 'run_name', 'tempfolder']
@@ -3188,14 +3200,14 @@ Source code for WORC.classification.SearchCV
instance_info = [i, random.randint(0, 2 ** 32 - 1), run_name, tempfolder]
instance_data = pd.Series(instance_info,
index=instance_labels,
- name=f'instance data {i}')
- fname = f'instancedata_{i}.hdf5'
+ name=f'instance data {i}')
+ fname = f'instancedata_{i}.hdf5'
instancefolder = os.path.join(tempfolder, 'instances', fname)
if not os.path.exists(os.path.dirname(instancefolder)):
os.makedirs(os.path.dirname(instancefolder))
instance_data.to_hdf(instancefolder, 'Instance Data')
- instancedata = f'vfs://tmp/GS/{name}/instances/{fname}'
- instance_files[f'{i}'] = instancedata
+ instancedata = f'vfs://tmp/GS/{name}/instances/{fname}'
+ instance_files[f'{i}'] = instancedata
# Create the fastr network
network = fastr.create_network('WORC_SMAC_' + name)
@@ -3213,7 +3225,7 @@ Source code for WORC.classification.SearchCV
source_data = {'estimator_source': estimatordata,
'instance_source': instance_files}
- sink_data = {'output': f"vfs://tmp/GS/{name}/output_{{sample_id}}_{{cardinality}}{{ext}}"}
+ sink_data = {'output': f"vfs://tmp/GS/{name}/output_{{sample_id}}_{{cardinality}}{{ext}}"}
network.execute(source_data, sink_data,
tmpdir=os.path.join(tempfolder, 'tmp'),
@@ -3226,12 +3238,12 @@ Source code for WORC.classification.SearchCV
difference = expected_no_files - len(sink_files)
fname = os.path.join(tempfolder, 'tmp')
message = ('Fitting classifiers has failed for ' +
- f'{difference} / {expected_no_files} files. The temporary ' +
- f'results where not deleted and can be found in {tempfolder}. ' +
+ f'{difference} / {expected_no_files} files. The temporary ' +
+ f'results where not deleted and can be found in {tempfolder}. ' +
'Probably your fitting and scoring failed: check out ' +
'the tmp/smac folder within the tempfolder for ' +
'the fastr job temporary results or run: fastr trace ' +
- f'"{fname}{os.path.sep}__sink_data__.json" --samples.')
+ f'"{fname}{os.path.sep}__sink_data__.json" --samples.')
raise WORCexceptions.WORCValueError(message)
# Read in the output data once finished
@@ -3520,7 +3532,7 @@ Source code for WORC.classification.SearchCV
"""
-[docs] def __init__(self, param_distributions={}, n_iter=10, scoring=None,
+[docs] def __init__(self, param_distributions={}, n_iter=10, scoring=None,
fit_params=None, n_jobs=1, iid=True, refit=True, cv=None,
verbose=0, pre_dispatch='2*n_jobs', random_state=None,
error_score='raise', return_train_score=True,
@@ -3568,20 +3580,25 @@ Source code for WORC.classification.SearchCV
-
@@ -3590,7 +3607,6 @@ Source code for WORC.classification.SearchCV
-
+
+
+
-
-
-
-
+
+
+
-
-
-
-
-
@@ -62,7 +65,7 @@
- 3.6.2
+ 3.6.3
@@ -79,6 +82,7 @@
+
@@ -104,6 +108,7 @@
+
@@ -138,11 +143,13 @@
+
+
- - Docs »
+ - »
- Module code »
@@ -184,18 +191,18 @@ Source code for WORC.classification.crossval
import logging
import os
import time
-from time import gmtime, strftime
+from time import gmtime, strftime
import glob
import random
import json
import copy
-from sklearn.metrics import f1_score, roc_auc_score
-from sklearn.model_selection import train_test_split, LeaveOneOut
-from joblib import Parallel, delayed
+from sklearn.metrics import f1_score, roc_auc_score
+from sklearn.model_selection import train_test_split, LeaveOneOut
+from joblib import Parallel, delayed
import WORC.addexceptions as ae
-from WORC.classification.parameter_optimization import random_search_parameters, guided_search_parameters
-from WORC.classification.regressors import regressors
-from WORC.classification.SearchCV import RandomizedSearchCVfastr
+from WORC.classification.parameter_optimization import random_search_parameters, guided_search_parameters
+from WORC.classification.regressors import regressors
+from WORC.classification.SearchCV import RandomizedSearchCVfastr
[docs]def random_split_cross_validation(image_features, feature_labels, classes,
@@ -229,15 +236,15 @@ Source code for WORC.classification.crossval
# If we are using fixed splits, set the n_iterations to the number of splits
if fixedsplits is not None:
n_iterations = int(fixedsplits.columns.shape[0] / 2)
- print(f'Fixedsplits detected, adjusting n_iterations to {n_iterations}')
- logging.debug(f'Fixedsplits detected, adjusting n_iterations to {n_iterations}')
+ print(f'Fixedsplits detected, adjusting n_iterations to {n_iterations}')
+ logging.debug(f'Fixedsplits detected, adjusting n_iterations to {n_iterations}')
for i in range(start, n_iterations):
print(('Cross-validation iteration {} / {} .').format(str(i + 1), str(n_iterations)))
logging.debug(('Cross-validation iteration {} / {} .').format(str(i + 1), str(n_iterations)))
timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime())
- print(f'\t Time: {timestamp}.')
- logging.debug(f'\t Time: {timestamp}.')
+ print(f'\t Time: {timestamp}.')
+ logging.debug(f'\t Time: {timestamp}.')
if fixed_seed:
random_seed = i**2
else:
@@ -405,7 +412,7 @@ Source code for WORC.classification.crossval
# Test performance for various RS and ensemble sizes
if config['General']['DoTestNRSNEns']:
- output_json = os.path.join(tempfolder, f'performance_RS_Ens_crossval_{i}.json')
+ output_json = os.path.join(tempfolder, f'performance_RS_Ens_crossval_{i}.json')
test_RS_Ensemble(estimator_input=trained_classifier,
X_train=X_train, Y_train=Y_train,
X_test=X_test, Y_test=Y_test,
@@ -445,8 +452,8 @@ Source code for WORC.classification.crossval
# Print elapsed time
elapsed = int((time.time() - t) / 60.0)
- print(f'\t Fitting took {elapsed} minutes.')
- logging.debug(f'\t Fitting took {elapsed} minutes.')
+ print(f'\t Fitting took {elapsed} minutes.')
+ logging.debug(f'\t Fitting took {elapsed} minutes.')
return save_data
@@ -485,8 +492,8 @@ Source code for WORC.classification.crossval
print(('Cross-validation iteration {} / {} .').format(str(i + 1), str(n_splits)))
logging.debug(('Cross-validation iteration {} / {} .').format(str(i + 1), str(n_splits)))
timestamp = strftime("%Y-%m-%d %H:%M:%S", gmtime())
- print(f'\t Time: {timestamp}.')
- logging.debug(f'\t Time: {timestamp}.')
+ print(f'\t Time: {timestamp}.')
+ logging.debug(f'\t Time: {timestamp}.')
if fixed_seed:
random_seed = i**2
else:
@@ -577,8 +584,8 @@ Source code for WORC.classification.crossval
# Print elapsed time
elapsed = int((time.time() - t) / 60.0)
- print(f'\t Fitting took {elapsed} minutes.')
- logging.debug(f'\t Fitting took {elapsed} minutes.')
+ print(f'\t Fitting took {elapsed} minutes.')
+ logging.debug(f'\t Fitting took {elapsed} minutes.')
return save_data
@@ -781,7 +788,7 @@ Source code for WORC.classification.crossval
use_SMAC=use_SMAC,
smac_result_file=smac_result_file)
else:
- raise ae.WORCKeyError(f'{crossval_type} is not a recognized cross-validation type.')
+ raise ae.WORCKeyError(f'{crossval_type} is not a recognized cross-validation type.')
[classifiers, X_train_set, X_test_set, Y_train_set, Y_test_set,
patient_ID_train_set, patient_ID_test_set, seed_set] =\
@@ -959,7 +966,7 @@ Source code for WORC.classification.crossval
# FIXME: Use home folder, as this function does not know
# Where final or temporary output is located
output_json = os.path.join(os.path.expanduser("~"),
- f'performance_RS_Ens.json')
+ f'performance_RS_Ens.json')
test_RS_Ensemble(estimator_input=trained_classifier,
X_train=X_train, Y_train=Y_train,
@@ -1009,14 +1016,14 @@ Source code for WORC.classification.crossval
if RS <= n_workflows:
# Make a key for saving the score
num = 0
- key = f'RS {RS} try {str(num).zfill(2)}'
+ key = f'RS {RS} try {str(num).zfill(2)}'
while key in keys:
num += 1
- key = f'RS {RS} try {str(num).zfill(2)}'
+ key = f'RS {RS} try {str(num).zfill(2)}'
keys.append(key)
# Make a local copy of the estimator and select only subset of workflows
- print(f'\t Using RS {RS}.')
+ print(f'\t Using RS {RS}.')
estimator = copy.deepcopy(estimator_original)
# estimator.maxlen = RS # Why is this needed? This will only lead to a lot of extra workflows on top of the top 100 being fitted
estimator.maxlen = min(RS, maxlen)
@@ -1081,40 +1088,40 @@ Source code for WORC.classification.crossval
F1_training = [F1_training[i] for i in selected_workflows]
F1_training = [F1_training[i] for i in workflow_ranking]
- performances[f'Mean training F1-score {key} top {maxlen}'] = F1_validation
- performances[f'Mean validation F1-score {key} top {maxlen}'] = F1_training
+ performances[f'Mean training F1-score {key} top {maxlen}'] = F1_validation
+ performances[f'Mean validation F1-score {key} top {maxlen}'] = F1_training
for ensemble in ensembles:
if isinstance(ensemble, int):
if ensemble > RS:
continue
else:
- print(f'\t Using ensemble {ensemble}.')
+ print(f'\t Using ensemble {ensemble}.')
# Create the ensemble
estimator.create_ensemble(X_train_temp, Y_train, method='top_N',
size=ensemble, verbose=verbose)
else:
- print(f'\t Using ensemble {ensemble}.')
+ print(f'\t Using ensemble {ensemble}.')
# Create the ensemble
estimator.create_ensemble(X_train_temp, Y_train, method=ensemble,
verbose=verbose)
- performances[f'Validation F1-score Ensemble {ensemble} {key}'] = estimator.ensemble_validation_score
+ performances[f'Validation F1-score Ensemble {ensemble} {key}'] = estimator.ensemble_validation_score
# Compute performance
y_prediction = estimator.predict(X_test)
y_score = estimator.predict_proba(X_test)[:, 1]
auc = roc_auc_score(Y_test, y_score)
f1_score_out = f1_score(Y_test, y_prediction, average='weighted')
- performances[f'Test F1-score Ensemble {ensemble} {key}'] = f1_score_out
- performances[f'Test AUC Ensemble {ensemble} {key}'] = auc
+ performances[f'Test F1-score Ensemble {ensemble} {key}'] = f1_score_out
+ performances[f'Test AUC Ensemble {ensemble} {key}'] = auc
y_prediction = estimator.predict(X_train)
y_score = estimator.predict_proba(X_train)[:, 1]
auc = roc_auc_score(Y_train, y_score)
f1_score_out = f1_score(Y_train, y_prediction, average='weighted')
- performances[f'Train F1-score Ensemble {ensemble} {key}'] = f1_score_out
- performances[f'Train AUC Ensemble {ensemble} {key}'] = auc
+ performances[f'Train F1-score Ensemble {ensemble} {key}'] = f1_score_out
+ performances[f'Train AUC Ensemble {ensemble} {key}'] = auc
# Write output
with open(output_json, 'w') as fp:
@@ -1125,20 +1132,25 @@ Source code for WORC.classification.crossval
-
@@ -1147,7 +1159,6 @@ Source code for WORC.classification.crossval
-
+
-
-
-
-
+
+
+
-
-
-
-
-
@@ -62,7 +65,7 @@
- 3.6.2
+ 3.6.3
@@ -79,6 +82,7 @@
+
@@ -104,6 +108,7 @@
+
@@ -138,11 +143,13 @@
+
+
- - Docs »
+ - »
- Module code »
@@ -164,7 +171,7 @@
Source code for WORC.classification.fitandscore
#!/usr/bin/env python
-# Copyright 2016-2022 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2023 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -179,38 +186,39 @@ Source code for WORC.classification.fitandscore
<
# See the License for the specific language governing permissions and
# limitations under the License.
-from sklearn.model_selection._validation import _fit_and_score
+from sklearn.model_selection._validation import _fit_and_score
import numpy as np
-from sklearn.linear_model import Lasso, LogisticRegression
-from sklearn.feature_selection import SelectFromModel
-from sklearn.decomposition import PCA
-from sklearn.multiclass import OneVsRestClassifier
-from sklearn.ensemble import RandomForestClassifier
-from WORC.classification.ObjectSampler import ObjectSampler
-from sklearn.utils.metaestimators import _safe_split
-from sklearn.utils.validation import _num_samples
-from WORC.classification import construct_classifier as cc
-from WORC.classification.metrics import check_multimetric_scoring
-from WORC.featureprocessing.Relief import SelectMulticlassRelief
-from WORC.featureprocessing.Imputer import Imputer
-from WORC.featureprocessing.Scalers import WORCScaler
-from WORC.featureprocessing.VarianceThreshold import selfeat_variance
-from WORC.featureprocessing.StatisticalTestThreshold import StatisticalTestThreshold
-from WORC.featureprocessing.SelectGroups import SelectGroups
-from WORC.featureprocessing.OneHotEncoderWrapper import OneHotEncoderWrapper
+from sklearn.linear_model import Lasso, LogisticRegression
+from sklearn.feature_selection import SelectFromModel, RFE
+from sklearn.decomposition import PCA
+from sklearn.multiclass import OneVsRestClassifier
+from sklearn.ensemble import RandomForestClassifier
+from WORC.classification.ObjectSampler import ObjectSampler
+from sklearn.utils.metaestimators import _safe_split
+from sklearn.utils.validation import _num_samples
+from WORC.classification import construct_classifier as cc
+from WORC.classification.metrics import check_multimetric_scoring
+from WORC.featureprocessing.Relief import SelectMulticlassRelief
+from WORC.featureprocessing.Imputer import Imputer
+from WORC.featureprocessing.Scalers import WORCScaler
+from WORC.featureprocessing.VarianceThreshold import selfeat_variance
+from WORC.featureprocessing.StatisticalTestThreshold import StatisticalTestThreshold
+from WORC.featureprocessing.SelectGroups import SelectGroups
+from WORC.featureprocessing.OneHotEncoderWrapper import OneHotEncoderWrapper
import WORC
import WORC.addexceptions as ae
import time
+from xgboost.sklearn import XGBRegressor
# Specific imports for error management
-from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
-from numpy.linalg import LinAlgError
+from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
+from numpy.linalg import LinAlgError
# Suppress some sklearn warnings. These occur when unused hyperparameters are
# supplied, when estimators that are refitted do not converge, or parts
# are deprecated
import warnings
-from sklearn.exceptions import ConvergenceWarning
+from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=ConvergenceWarning)
@@ -355,6 +363,10 @@ Source code for WORC.classification.fitandscore
<
Either None if the statistical test feature selection is not used, or
the fitted object.
+ RFESel: WORC RFESel Object
+ Either None if the recursive feature elimination feature selection is not used, or
+ the fitted object.
+
ReliefSel: WORC ReliefSel Object
Either None if the RELIEF feature selection is not used, or
the fitted object.
@@ -403,6 +415,7 @@ Source code for WORC.classification.fitandscore
<
SelectModel = None
pca = None
StatisticalSel = None
+ RFESel = None
VarSel = None
ReliefSel = None
if isinstance(scorers, dict):
@@ -440,7 +453,7 @@ Source code for WORC.classification.fitandscore
<
if 'OneHotEncoding' in para_estimator.keys():
if para_estimator['OneHotEncoding'] == 'True':
if verbose:
- print(f'Applying OneHotEncoding, will ignore unknowns.')
+ print(f'Applying OneHotEncoding, will ignore unknowns.')
feature_labels_tofit =\
para_estimator['OneHotEncoding_feature_labels_tofit']
encoder =\
@@ -471,7 +484,7 @@ Source code for WORC.classification.fitandscore
<
if para_estimator['Imputation'] == 'True':
imp_type = para_estimator['ImputationMethod']
if verbose:
- print(f'Imputing NaN with {imp_type}.')
+ print(f'Imputing NaN with {imp_type}.')
# Only used with KNN in SMAC, otherwise assign default
if 'ImputationNeighbours' in para_estimator.keys():
@@ -489,7 +502,7 @@ Source code for WORC.classification.fitandscore
<
if original_shape != imputed_shape:
removed_features = original_shape[1] - imputed_shape[1]
if para_estimator['ImputationSkipAllNaN'] == 'True':
- print(f"[WARNING]: Several features ({removed_features}) were np.NaN for all objects. config['Imputation']['skipallNaN'] set to True, so simply eliminate these features.")
+ print(f"[WARNING]: Several features ({removed_features}) were np.NaN for all objects. config['Imputation']['skipallNaN'] set to True, so simply eliminate these features.")
if hasattr(imputer.Imputer, 'statistics_'):
X_train = imputer.transform(X_train)
X_test = imputer.transform(X_test)
@@ -504,7 +517,7 @@ Source code for WORC.classification.fitandscore
<
feature_labels_zero = [fl for fnum, fl in enumerate(feature_labels[0]) if not np.isnan(temp_imputer.Imputer.statistics_[fnum])]
feature_labels = [feature_labels_zero for i in X_train]
else:
- raise ae.WORCValueError(f'Several features ({removed_features}) were np.NaN for all objects. Hence, imputation was not possible. Either make sure this is correct and turn of imputation, or correct the feature.')
+ raise ae.WORCValueError(f'Several features ({removed_features}) were np.NaN for all objects. Hence, imputation was not possible. Either make sure this is correct and turn of imputation, or correct the feature.')
else:
X_train = imputer.transform(X_train)
X_test = imputer.transform(X_test)
@@ -613,7 +626,7 @@ Source code for WORC.classification.fitandscore
<
ret[2] = runtime
if return_all:
- return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
+ return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, RFESel, ReliefSel, Sampler
else:
return ret
@@ -654,7 +667,7 @@ Source code for WORC.classification.fitandscore
<
if return_all:
return ret, GroupSel, VarSel, SelectModel,\
feature_labels[0], scaler, encoder, imputer, pca,\
- StatisticalSel, ReliefSel, Sampler
+ StatisticalSel, RFESel, ReliefSel, Sampler
else:
return ret
@@ -671,7 +684,7 @@ Source code for WORC.classification.fitandscore
<
# Feature scaling
if verbose and para_estimator['FeatureScaling'] != 'None':
print('Fitting scaler and transforming features, method ' +
- f'{para_estimator["FeatureScaling"]}.')
+ f'{para_estimator["FeatureScaling"]}.')
scaling_method = para_estimator['FeatureScaling']
if scaling_method == 'None':
@@ -752,7 +765,7 @@ Source code for WORC.classification.fitandscore
<
if return_all:
return ret, GroupSel, VarSel, SelectModel,\
feature_labels[0], scaler, encoder, imputer, pca,\
- StatisticalSel, ReliefSel, Sampler
+ StatisticalSel, RFESel, ReliefSel, Sampler
else:
return ret
else:
@@ -779,7 +792,7 @@ Source code for WORC.classification.fitandscore
<
if para_estimator['SelectFromModel'] == 'True':
model = para_estimator['SelectFromModel_estimator']
if verbose:
- print(f"Selecting features using model {model}.")
+ print(f"Selecting features using model {model}.")
if model == 'Lasso':
# Use lasso model for feature selection
@@ -796,12 +809,12 @@ Source code for WORC.classification.fitandscore
<
selectestimator = RandomForestClassifier(n_estimators=n_estimators,
random_state=random_seed)
else:
- raise ae.WORCKeyError(f'Model {model} is not known for SelectFromModel. Use Lasso, LR, or RF.')
+ raise ae.WORCKeyError(f'Model {model} is not known for SelectFromModel. Use Lasso, LR, or RF.')
if len(y_train.shape) >= 2:
# Multilabel or regression. Regression: second dimension has length 1
if y_train.shape[1] > 1 and model != 'RF':
- raise ae.WORCValueError(f'Model {model} is not suitable for multiclass classification. Please use RF or do not use SelectFromModel.')
+ raise ae.WORCValueError(f'Model {model} is not suitable for multiclass classification. Please use RF or do not use SelectFromModel.')
# Prefit model
selectestimator.fit(X_train, y_train)
@@ -838,7 +851,7 @@ Source code for WORC.classification.fitandscore
<
if return_all:
return ret, GroupSel, VarSel, SelectModel,\
feature_labels[0], scaler, encoder, imputer, pca,\
- StatisticalSel, ReliefSel, Sampler
+ StatisticalSel, RFESel, ReliefSel, Sampler
else:
return ret
@@ -862,6 +875,198 @@ Source code for WORC.classification.fitandscore
<
if not return_all:
del SelectModel
+ # --------------------------------------------------------------------
+ # Feature selection based on a statistical test
+ if 'StatisticalTestUse' in para_estimator.keys():
+ if para_estimator['StatisticalTestUse'] == 'True':
+ metric = para_estimator['StatisticalTestMetric']
+ threshold = para_estimator['StatisticalTestThreshold']
+ if verbose:
+ print(f"Selecting features based on statistical test. Method {metric}, threshold {round(threshold, 5)}.")
+ print("\t Original Length: " + str(len(X_train[0])))
+
+ StatisticalSel = StatisticalTestThreshold(metric=metric,
+ threshold=threshold)
+
+ StatisticalSel.fit(X_train, y)
+ X_train_temp = StatisticalSel.transform(X_train)
+ if len(X_train_temp[0]) == 0:
+ if verbose:
+ print('[WARNING] No features are selected! Probably your statistical test feature selection was too strict.')
+
+ StatisticalSel = None
+ if skip:
+ if verbose:
+ print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
+ parameters['StatisticalTestUse'] = 'False'
+ else:
+ if verbose:
+ print('[WARNING] Returning NaN as performance.')
+
+ # return NaN as performance
+ para_estimator = delete_nonestimator_parameters(para_estimator)
+
+ # Update the runtime
+ end_time = time.time()
+ runtime = end_time - start_time
+ if return_train_score:
+ ret[3] = runtime
+ else:
+ ret[2] = runtime
+ if return_all:
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer, pca,\
+ StatisticalSel, RFESel, ReliefSel, Sampler
+ else:
+ return ret
+
+ else:
+ X_train = StatisticalSel.transform(X_train)
+ X_test = StatisticalSel.transform(X_test)
+ feature_labels = StatisticalSel.transform(feature_labels)
+
+ if verbose:
+ print("\t New Length: " + str(len(X_train[0])))
+
+ # Delete the statistical test keys
+ del para_estimator['StatisticalTestUse']
+ if 'StatisticalTestMetric' in para_estimator.keys():
+ del para_estimator['StatisticalTestMetric']
+
+ if 'StatisticalTestThreshold' in para_estimator.keys():
+ del para_estimator['StatisticalTestThreshold']
+
+ # Delete the object if we do not need to return it
+ if not return_all:
+ del StatisticalSel
+
+ # --------------------------------------------------------------------
+ # Feature selection through recursive feature elimination
+ if 'RFE' in para_estimator.keys():
+ model = para_estimator['RFE_estimator']
+ if para_estimator['RFE'] == 'True':
+ if verbose:
+ print(f"Selecting features using recursive feature elimination using model {model}.")
+
+ if model == 'Lasso':
+ # Use lasso model for feature selection
+ alpha = para_estimator['RFE_lasso_alpha']
+ selectestimator = Lasso(alpha=alpha, random_state=random_seed)
+
+ elif model == 'LR':
+ # Use logistic regression model for feature selection
+ selectestimator = LogisticRegression(random_state=random_seed)
+
+ elif model == 'RF':
+ # Use random forest model for feature selection
+ n_estimators = para_estimator['RFE_n_trees']
+ selectestimator = RandomForestClassifier(n_estimators=n_estimators,
+ random_state=random_seed)
+ else:
+ raise ae.WORCKeyError(f'Model {model} is not known for RFE. Use Lasso, LR, or RF.')
+
+ if len(y_train.shape) >= 2:
+ # Multilabel or regression. Regression: second dimension has length 1
+ if y_train.shape[1] > 1 and model != 'RF':
+ raise ae.WORCValueError(f'Model {model} is not suitable for multiclass classification. Please use RF or do not use RFE.')
+
+ # Prefit model
+ selectestimator.fit(X_train, y_train)
+
+ # Use fit to select optimal features
+ n_features_to_select = para_estimator['RFE_n_features_to_select']
+ step = para_estimator['RFE_step']
+ RFESel = RFE(selectestimator,
+ n_features_to_select=n_features_to_select,
+ step=step)
+ try:
+ RFESel.fit(X_train, y_train)
+ except ValueError:
+ if skip:
+ if verbose:
+ print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
+ parameters['RFE'] = 'False'
+
+ else:
+ if verbose:
+ print('[WARNING] RFE cannot be fitted with these settings, too few features left, returning NaN as performance.')
+
+ # return NaN as performance
+ para_estimator = delete_nonestimator_parameters(para_estimator)
+ RFESel = None
+
+ # Update the runtime
+ end_time = time.time()
+ runtime = end_time - start_time
+ if return_train_score:
+ ret[3] = runtime
+ else:
+ ret[2] = runtime
+ if return_all:
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer, pca,\
+ StatisticalSel, RFESel, ReliefSel, Sampler
+ else:
+ return ret
+ else:
+ if verbose:
+ print("\t Original Length: " + str(len(X_train[0])))
+
+ X_train_temp = RFESel.transform(X_train)
+ if len(X_train_temp[0]) == 0:
+ if verbose:
+ print('[WARNING]: No features are selected! Probably your data is too noisy or the selection too strict.')
+
+ RFESel = None
+ if skip:
+ if verbose:
+ print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
+ parameters['RFE'] = 'False'
+ else:
+ if verbose:
+ print('[WARNING] Returning NaN as performance.')
+
+ # return NaN as performance
+ para_estimator = delete_nonestimator_parameters(para_estimator)
+
+ # Update the runtime
+ end_time = time.time()
+ runtime = end_time - start_time
+ if return_train_score:
+ ret[3] = runtime
+ else:
+ ret[2] = runtime
+ if return_all:
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer, pca,\
+ StatisticalSel, RFESel, ReliefSel, Sampler
+ else:
+ return ret
+
+ else:
+ X_train = RFESel.transform(X_train)
+ X_test = RFESel.transform(X_test)
+ feature_labels = RFESel.transform(feature_labels)
+
+ if verbose:
+ print("\t New Length: " + str(len(X_train[0])))
+
+ del para_estimator['RFE']
+ if 'RFE_lasso_alpha' in para_estimator.keys():
+ del para_estimator['RFE_lasso_alpha']
+ if 'RFE_estimator' in para_estimator.keys():
+ del para_estimator['RFE_estimator']
+ if 'RFE_n_trees' in para_estimator.keys():
+ del para_estimator['RFE_n_trees']
+ if 'RFE_n_features_to_select' in para_estimator.keys():
+ del para_estimator['RFE_n_features_to_select']
+ if 'RFE_n_trees' in para_estimator.keys():
+ del para_estimator['RFE_n_trees']
+
+ # Delete the object if we do not need to return it
+ if not return_all:
+ del RFESel
+
# ----------------------------------------------------------------
# PCA dimensionality reduction
# Principle Component Analysis
@@ -876,7 +1081,7 @@ Source code for WORC.classification.fitandscore
<
pca.fit(X_train)
except (ValueError, LinAlgError) as e:
if verbose:
- print(f'[WARNING] PCA Error: {e}.')
+ print(f'[WARNING] PCA Error: {e}.')
pca = None
if skip:
@@ -900,7 +1105,7 @@ Source code for WORC.classification.fitandscore
<
if return_all:
return ret, GroupSel, VarSel, SelectModel,\
feature_labels[0], scaler, encoder, imputer, pca,\
- StatisticalSel, ReliefSel, Sampler
+ StatisticalSel, RFESel, ReliefSel, Sampler
else:
return ret
@@ -918,7 +1123,7 @@ Source code for WORC.classification.fitandscore
<
pca.fit(X_train)
except (ValueError, LinAlgError) as e:
if verbose:
- print(f'[WARNING]: PCA Error: {e}.')
+ print(f'[WARNING]: PCA Error: {e}.')
pca = None
if skip:
@@ -942,7 +1147,7 @@ Source code for WORC.classification.fitandscore
<
if return_all:
return ret, GroupSel, VarSel, SelectModel,\
feature_labels[0], scaler, encoder, imputer, pca,\
- StatisticalSel, ReliefSel, Sampler
+ StatisticalSel, RFESel, ReliefSel, Sampler
else:
return ret
else:
@@ -956,7 +1161,7 @@ Source code for WORC.classification.fitandscore
<
if n_components >= len(X_train[0]):
if verbose:
- print(f"[WARNING] PCA n_components ({n_components})> n_features ({len(X_train[0])}): skipping PCA.")
+ print(f"[WARNING] PCA n_components ({n_components})> n_features ({len(X_train[0])}): skipping PCA.")
else:
pca = PCA(n_components=n_components, random_state=random_seed)
try:
@@ -965,7 +1170,7 @@ Source code for WORC.classification.fitandscore
<
X_test = pca.transform(X_test)
except (ValueError, LinAlgError) as e:
if verbose:
- print(f'[WARNING] PCA Error: {e}.')
+ print(f'[WARNING] PCA Error: {e}.')
pca = None
if skip:
@@ -989,7 +1194,7 @@ Source code for WORC.classification.fitandscore
<
if return_all:
return ret, GroupSel, VarSel, SelectModel,\
feature_labels[0], scaler, encoder, imputer, pca,\
- StatisticalSel, ReliefSel, Sampler
+ StatisticalSel, RFESel, ReliefSel, Sampler
else:
return ret
@@ -1005,71 +1210,6 @@ Source code for WORC.classification.fitandscore
<
if 'PCAType' in para_estimator.keys():
del para_estimator['PCAType']
- # --------------------------------------------------------------------
- # Feature selection based on a statistical test
- if 'StatisticalTestUse' in para_estimator.keys():
- if para_estimator['StatisticalTestUse'] == 'True':
- metric = para_estimator['StatisticalTestMetric']
- threshold = para_estimator['StatisticalTestThreshold']
- if verbose:
- print(f"Selecting features based on statistical test. Method {metric}, threshold {round(threshold, 5)}.")
- print("\t Original Length: " + str(len(X_train[0])))
-
- StatisticalSel = StatisticalTestThreshold(metric=metric,
- threshold=threshold)
-
- StatisticalSel.fit(X_train, y)
- X_train_temp = StatisticalSel.transform(X_train)
- if len(X_train_temp[0]) == 0:
- if verbose:
- print('[WARNING] No features are selected! Probably your statistical test feature selection was too strict.')
-
- StatisticalSel = None
- if skip:
- if verbose:
- print('[WARNING] Refitting, so we need an estimator, thus skipping this step.')
- parameters['StatisticalTestUse'] = 'False'
- else:
- if verbose:
- print('[WARNING] Returning NaN as performance.')
-
- # return NaN as performance
- para_estimator = delete_nonestimator_parameters(para_estimator)
-
- # Update the runtime
- end_time = time.time()
- runtime = end_time - start_time
- if return_train_score:
- ret[3] = runtime
- else:
- ret[2] = runtime
- if return_all:
- return ret, GroupSel, VarSel, SelectModel,\
- feature_labels[0], scaler, encoder, imputer, pca,\
- StatisticalSel, ReliefSel, Sampler
- else:
- return ret
-
- else:
- X_train = StatisticalSel.transform(X_train)
- X_test = StatisticalSel.transform(X_test)
- feature_labels = StatisticalSel.transform(feature_labels)
-
- if verbose:
- print("\t New Length: " + str(len(X_train[0])))
-
- # Delete the statistical test keys
- del para_estimator['StatisticalTestUse']
- if 'StatisticalTestMetric' in para_estimator.keys():
- del para_estimator['StatisticalTestMetric']
-
- if 'StatisticalTestThreshold' in para_estimator.keys():
- del para_estimator['StatisticalTestThreshold']
-
- # Delete the object if we do not need to return it
- if not return_all:
- del StatisticalSel
-
# ------------------------------------------------------------------------
# Use object resampling
if 'Resampling_Use' in para_estimator.keys():
@@ -1120,7 +1260,7 @@ Source code for WORC.classification.fitandscore
<
if 'ADASYN is not suited for this specific dataset. Use SMOTE instead.' in str(e):
# Seldomly occurs, therefore return performance dummy
if verbose:
- print(f'[WARNING]: {e}. Returning dummies. Parameters: ')
+ print(f'[WARNING]: {e}. Returning dummies. Parameters: ')
print(parameters)
para_estimator = delete_nonestimator_parameters(para_estimator)
@@ -1133,7 +1273,9 @@ Source code for WORC.classification.fitandscore
<
ret[2] = runtime
if return_all:
- return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
+ return ret, GroupSel, VarSel, SelectModel,\
+ feature_labels[0], scaler, encoder, imputer,\
+ pca, StatisticalSel, RFESel, ReliefSel, Sampler
else:
return ret
else:
@@ -1143,7 +1285,7 @@ Source code for WORC.classification.fitandscore
<
neg = int(len(y_train_temp) - pos)
if pos < 10 or neg < 10:
if verbose:
- print(f'[WARNING] Skipping resampling: to few objects returned in one or both classes (pos: {pos}, neg: {neg}).')
+ print(f'[WARNING] Skipping resampling: to few objects returned in one or both classes (pos: {pos}, neg: {neg}).')
Sampler = None
parameters['Resampling_Use'] = 'False'
@@ -1155,8 +1297,8 @@ Source code for WORC.classification.fitandscore
<
pos = int(np.sum(y_train))
neg = int(len(y_train) - pos)
if verbose:
- message = f"Resampling from {len_in} ({pos_initial} pos," +\
- f" {neg_initial} neg) to {len(y_train)} ({pos} pos, {neg} neg) patients."
+ message = f"Resampling from {len_in} ({pos_initial} pos," +\
+ f" {neg_initial} neg) to {len(y_train)} ({pos} pos, {neg} neg) patients."
print(message)
# Also reset train and test indices
@@ -1212,13 +1354,13 @@ Source code for WORC.classification.fitandscore
<
estimator = OneVsRestClassifier(estimator)
if verbose:
- print(f"Fitting ML method: {parameters['classifiers']}.")
+ print(f"Fitting ML method: {parameters['classifiers']}.")
# Recombine feature values and label for train and test set
feature_values = np.concatenate((X_train, X_test), axis=0)
y_all = np.concatenate((y_train, y_test), axis=0)
para_estimator = None
-
+
try:
ret = _fit_and_score(estimator, feature_values, y_all,
scorers, new_train,
@@ -1233,7 +1375,7 @@ Source code for WORC.classification.fitandscore
<
except (ValueError, LinAlgError) as e:
if type(estimator) == LDA:
if verbose:
- print(f'[WARNING]: skipping this setting due to LDA Error: {e}.')
+ print(f'[WARNING]: skipping this setting due to LDA Error: {e}.')
# Update the runtime
end_time = time.time()
@@ -1244,7 +1386,7 @@ Source code for WORC.classification.fitandscore
<
ret[2] = runtime
if return_all:
- return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
+ return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, RFESel, ReliefSel, Sampler
else:
return ret
else:
@@ -1277,7 +1419,7 @@ Source code for WORC.classification.fitandscore
<
ret[2] = runtime
if return_all:
- return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, ReliefSel, Sampler
+ return ret, GroupSel, VarSel, SelectModel, feature_labels[0], scaler, encoder, imputer, pca, StatisticalSel, RFESel, ReliefSel, Sampler
else:
return ret
@@ -1305,6 +1447,12 @@ Source code for WORC.classification.fitandscore
<
'SelectFromModel_lasso_alpha',
'SelectFromModel_estimator',
'SelectFromModel_n_trees',
+ 'RFE',
+ 'RFE_lasso_alpha',
+ 'RFE_estimator',
+ 'RFE_n_trees',
+ 'RFE_n_features_to_select',
+ 'RFE_step',
'Featsel_Variance',
'FeatPreProcess',
'FeatureScaling',
@@ -1338,9 +1486,9 @@ Source code for WORC.classification.fitandscore
<
if np.isnan(value):
if verbose:
if feature_labels is not None:
- print(f"[WARNING] NaN found, patient {pnum}, label {feature_labels[fnum]}. Replacing with zero.")
+ print(f"[WARNING] NaN found, patient {pnum}, label {feature_labels[fnum]}. Replacing with zero.")
else:
- print(f"[WARNING] NaN found, patient {pnum}, label {fnum}. Replacing with zero.")
+ print(f"[WARNING] NaN found, patient {pnum}, label {fnum}. Replacing with zero.")
# Note: X is a list of lists, hence we cannot index the element directly
image_features_temp[pnum, fnum] = 0
@@ -1399,20 +1547,25 @@ Source code for WORC.classification.fitandscore
<
-
@@ -1421,7 +1574,6 @@ Source code for WORC.classification.fitandscore
<
-
+
-
-
-
-
+
+
+
-
-
-
-
-
@@ -62,7 +65,7 @@
- 3.6.2
+ 3.6.3
@@ -79,6 +82,7 @@
+
@@ -104,6 +108,7 @@
+
@@ -138,11 +143,13 @@
+
+
- - Docs »
+ - »
- Module code »
@@ -180,9 +187,9 @@ Source code for WORC.classification.parameter_optimization
# limitations under the License.
import numpy as np
-from sklearn.utils import check_random_state
-from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit
-from WORC.classification.SearchCV import RandomizedSearchCVfastr, RandomizedSearchCVJoblib, GuidedSearchCVSMAC
+from sklearn.utils import check_random_state
+from sklearn.model_selection import StratifiedShuffleSplit, ShuffleSplit
+from WORC.classification.SearchCV import RandomizedSearchCVfastr, RandomizedSearchCVJoblib, GuidedSearchCVSMAC
[docs]def random_search_parameters(features, labels, N_iter, test_size,
@@ -220,9 +227,8 @@ Source code for WORC.classification.parameter_optimization
random_search: sklearn randomsearch object containing the results.
"""
if random_seed is None:
- #random_seed = np.random.randint(1, 5000)
- # Fix the random seed for testing
- random_seed = 42
+ random_seed = np.random.randint(1, 5000)
+
random_state = check_random_state(random_seed)
regressors = ['SVR', 'RFR', 'SGDR', 'Lasso', 'ElasticNet']
@@ -263,8 +269,8 @@ Source code for WORC.classification.parameter_optimization
random_search.fit(features, labels)
print("Best found parameters:")
for i in random_search.best_params_:
- print(f'{i}: {random_search.best_params_[i]}.')
- print(f"\n Best score using best parameters: {scoring_method} = {random_search.best_score_}")
+ print(f'{i}: {random_search.best_params_[i]}.')
+ print(f"\n Best score using best parameters: {scoring_method} = {random_search.best_score_}")
return random_search
@@ -336,7 +342,7 @@ Source code for WORC.classification.parameter_optimization
guided_search.fit(features, labels)
print("Best found parameters:")
for i in guided_search.best_params_:
- print(f'{i}: {guided_search.best_params_[i]}.')
+ print(f'{i}: {guided_search.best_params_[i]}.')
print("\n Best score using best parameters:")
print(guided_search.best_score_)
@@ -347,20 +353,25 @@ Source code for WORC.classification.parameter_optimization
- © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands.
- Built with Sphinx using a theme provided by Read the Docs.
+
+
+
+ Built with Sphinx using a
+
+ theme
+
+ provided by Read the Docs.
-
@@ -369,7 +380,6 @@ Source code for WORC.classification.parameter_optimization
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
diff --git a/WORC/doc/_build/html/_modules/WORC/classification/trainclassifier.html b/WORC/doc/_build/html/_modules/WORC/classification/trainclassifier.html
index a6dd9c5f..cec245b0 100644
--- a/WORC/doc/_build/html/_modules/WORC/classification/trainclassifier.html
+++ b/WORC/doc/_build/html/_modules/WORC/classification/trainclassifier.html
@@ -1,39 +1,42 @@
-
-
+
-
+
-
+
- WORC.classification.trainclassifier — WORC 3.6.2 documentation
+ WORC.classification.trainclassifier — WORC 3.6.3 documentation
+
+
+
+
+
+
+
+
-
+
-
-
-
-
+
+
+
-
-
-
-
-
@@ -62,7 +65,7 @@
- 3.6.2
+ 3.6.3
@@ -79,6 +82,7 @@
+
@@ -104,6 +108,7 @@
+
@@ -138,11 +143,13 @@
+
+
- - Docs »
+ - »
- Module code »
@@ -164,7 +171,7 @@
Source code for WORC.classification.trainclassifier
#!/usr/bin/env python
-# Copyright 2016-2022 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2023 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -181,12 +188,12 @@ Source code for WORC.classification.trainclassifier
import os
import numpy as np
-from scipy.stats import uniform
-from WORC.classification import crossval as cv
-from WORC.classification import construct_classifier as cc
-from WORC.IOparser.file_io import load_features
+from scipy.stats import uniform
+from WORC.classification import crossval as cv
+from WORC.classification import construct_classifier as cc
+from WORC.IOparser.file_io import load_features
import WORC.IOparser.config_io_classifier as config_io
-from WORC.classification.AdvancedSampler import discrete_uniform, \
+from WORC.classification.AdvancedSampler import discrete_uniform, \
log_uniform, boolean_uniform
import json
@@ -312,6 +319,9 @@ Source code for WORC.classification.trainclassifier
# Add non-classifier parameters
param_grid = add_parameters_to_grid(param_grid, config)
+
+ # Delete parameters for hyperoptimization which already have been used
+ del config['HyperOptimization']['fix_random_seed']
# For N_iter, perform k-fold crossvalidation
outputfolder = os.path.dirname(output_hdf)
@@ -409,6 +419,28 @@ Source code for WORC.classification.trainclassifier
discrete_uniform(loc=config['Featsel']['SelectFromModel_n_trees'][0],
scale=config['Featsel']['SelectFromModel_n_trees'][1])
+ param_grid['RFE'] =\
+ boolean_uniform(threshold=config['Featsel']['RFE'])
+
+ param_grid['RFE_lasso_alpha'] =\
+ uniform(loc=config['Featsel']['RFE_lasso_alpha'][0],
+ scale=config['Featsel']['RFE_lasso_alpha'][1])
+
+ param_grid['RFE_estimator'] =\
+ config['Featsel']['RFE_estimator']
+
+ param_grid['RFE_n_trees'] =\
+ discrete_uniform(loc=config['Featsel']['RFE_n_trees'][0],
+ scale=config['Featsel']['RFE_n_trees'][1])
+
+ param_grid['RFE_n_features_to_select'] =\
+ discrete_uniform(loc=config['Featsel']['RFE_n_features_to_select'][0],
+ scale=config['Featsel']['RFE_n_features_to_select'][1])
+
+ param_grid['RFE_step'] =\
+ discrete_uniform(loc=config['Featsel']['RFE_step'][0],
+ scale=config['Featsel']['RFE_step'][1])
+
param_grid['UsePCA'] =\
boolean_uniform(threshold=config['Featsel']['UsePCA'])
param_grid['PCAType'] = config['Featsel']['PCAType']
@@ -442,8 +474,12 @@ Source code for WORC.classification.trainclassifier
scale=config['Featsel']['ReliefNumFeatures'][1])
# Add a random seed, which is required for many methods
- param_grid['random_seed'] =\
- discrete_uniform(loc=0, scale=2**32 - 1)
+ if config['HyperOptimization']['fix_random_seed']:
+ # Fix the random seed
+ param_grid['random_seed'] = [22]
+ else:
+ param_grid['random_seed'] =\
+ discrete_uniform(loc=0, scale=2**32 - 1)
return param_grid
@@ -452,20 +488,25 @@ Source code for WORC.classification.trainclassifier
- © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands
+ © Copyright 2016 -- 2023, Biomedical Imaging Group Rotterdam, Department of Radiology and Nuclear Medicine, Erasmus University Medical Center, Rotterdam, The Netherlands.
- Built with Sphinx using a theme provided by Read the Docs.
+
+
+
+ Built with Sphinx using a
+
+ theme
+
+ provided by Read the Docs.
-
@@ -474,7 +515,6 @@ Source code for WORC.classification.trainclassifier
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
diff --git a/WORC/doc/_build/html/_modules/WORC/detectors/detectors.html b/WORC/doc/_build/html/_modules/WORC/detectors/detectors.html
index 6194d716..9c90810c 100644
--- a/WORC/doc/_build/html/_modules/WORC/detectors/detectors.html
+++ b/WORC/doc/_build/html/_modules/WORC/detectors/detectors.html
@@ -1,39 +1,42 @@
-
-
+
-
+
-
+
- WORC.detectors.detectors — WORC 3.6.2 documentation
+ WORC.detectors.detectors — WORC 3.6.3 documentation
+
+
+
+
+
+
-
+
+
+
-
-
-
-
+
+
+
-
-
-
-
-
@@ -62,7 +65,7 @@
- 3.6.2
+ 3.6.3
@@ -79,6 +82,7 @@
+
@@ -104,6 +108,7 @@
+
@@ -138,11 +143,13 @@
+
+
- - Docs »
+ - »
- Module code »
@@ -181,9 +188,9 @@ Source code for WORC.detectors.detectors
import csv
import string
-from abc import ABC, abstractmethod
-from pathlib import Path
-from os import environ
+from abc import ABC, abstractmethod
+from pathlib import Path
+from os import environ
import platform
import os
import pkg_resources
@@ -202,7 +209,7 @@ Source code for WORC.detectors.detectors
return result
def _generate_detector_message(self, detected_Value):
- return f"{self.__class__.__name__[0:-8]} detected: {detected_Value}."
+ return f"{self.__class__.__name__[0:-8]} detected: {detected_Value}."
@abstractmethod
def _is_detected(self, *args, **kwargs):
@@ -210,7 +217,7 @@ Source code for WORC.detectors.detectors
[docs]class CsvDetector(AbstractDetector):
-[docs] def __init__(self, csv_file_path):
+
def _is_detected(self, *args, **kwargs):
@@ -267,7 +274,7 @@ Source code for WORC.detectors.detectors
@@ -326,7 +338,6 @@ Source code for WORC.detectors.detectors
-
+
-
-
-
-
+
+
+
-
-
-
-
-
@@ -62,7 +65,7 @@
- 3.6.2
+ 3.6.3
@@ -79,6 +82,7 @@
+
@@ -104,6 +108,7 @@
+
@@ -138,11 +143,13 @@
+
+
- - Docs »
+ - »
- Module code »
@@ -164,7 +171,7 @@
Source code for WORC.featureprocessing.StatisticalTestFeatures
#!/usr/bin/env python
-# Copyright 2016-2020 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2023 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
@@ -186,11 +193,11 @@ Source code for WORC.featureprocessing.StatisticalTestFeatures
import os
import csv
import numpy as np
-from scipy.stats import ttest_ind, ranksums, mannwhitneyu, chi2_contingency
+from scipy.stats import ttest_ind, ranksums, mannwhitneyu, chi2_contingency
import WORC.IOparser.config_io_classifier as config_io
-from WORC.IOparser.file_io import load_features
-from WORC.detectors.detectors import DebugDetector
-from WORC.plotting.plot_pvalues_features import manhattan_importance
+from WORC.IOparser.file_io import load_features
+from WORC.detectors.detectors import DebugDetector
+from WORC.plotting.plot_pvalues_features import manhattan_importance
[docs]def StatisticalTestFeatures(features, patientinfo, config, output_csv=None,
@@ -325,16 +332,23 @@ Source code for WORC.featureprocessing.StatisticalTestFeatures
# Optional: perform chi2 test. Only do this when categorical, which we define as less than 20 options.
unique_values = list(set(fv))
unique_values.sort()
- if len(unique_values) == 1:
+ if len(unique_values) == 0: # All NaN
+ print("[WORC Warning] " + fl + " has no value. Replacing chi2 metric value by NaN.")
+ pvalueschi2.append(np.nan)
+ elif len(unique_values) == 1:
print("[WORC Warning] " + fl + " has only one value. Replacing chi2 metric value by NaN.")
pvalueschi2.append(np.nan)
elif len(unique_values) <= 20:
class1_count = [class1.count(i) for i in unique_values]
class2_count = [class2.count(i) for i in unique_values]
obs = np.array([class1_count, class2_count])
-
- _, p, _, _ = chi2_contingency(obs)
- pvalueschi2.append(p)
+
+ try:
+ _, p, _, _ = chi2_contingency(obs)
+ pvalueschi2.append(p)
+ except ValueError:
+ print("[WORC Warning] " + fl + " has a zero element in table of frequencies. Replacing chi2 metric value by NaN.")
+ pvalueschi2.append(np.nan)
else:
print("[WORC Warning] " + fl + " is no categorical variable. Replacing chi2 metric value by NaN.")
pvalueschi2.append(np.nan)
@@ -421,35 +435,35 @@ Source code for WORC.featureprocessing.StatisticalTestFeatures
}
for o in objects:
- if 'hf_' in o:
+ if 'hf_' in o.lower():
labels.append(0)
- elif 'sf_' in o:
+ elif 'sf_' in o.lower():
labels.append(1)
- elif 'of_' in o:
+ elif 'of_' in o.lower():
labels.append(2)
- elif 'GLCM_' in o or 'GLCMMS_' in o:
+ elif 'glcm_' in o or 'glcmms_' in o.lower():
labels.append(3)
- elif 'GLRLM_' in o:
+ elif 'glrlm_' in o.lower():
labels.append(4)
- elif 'GLSZM_' in o:
+ elif 'glszm_' in o.lower():
labels.append(5)
- elif 'GLDM_' in o:
+ elif 'gldm_' in o.lower():
labels.append(6)
- elif 'NGTDM_' in o:
+ elif 'ngtdm_' in o.lower():
labels.append(7)
- elif 'Gabor_' in o:
+ elif 'gabor_' in o.lower():
labels.append(8)
- elif 'semf_' in o:
+ elif 'semf_' in o.lower():
labels.append(9)
- elif 'df_' in o:
+ elif 'df_' in o.lower():
labels.append(10)
- elif 'logf_' in o:
+ elif 'logf_' in o.lower():
labels.append(11)
- elif 'vf_' in o:
+ elif 'vf_' in o.lower():
labels.append(12)
- elif 'LBP_' in o:
+ elif 'lbp_' in o.lower():
labels.append(13)
- elif 'phasef_' in o:
+ elif 'phasef_' in o.lower():
labels.append(14)
else:
raise KeyError(o)
@@ -513,20 +527,25 @@ Source code for WORC.featureprocessing.StatisticalTestFeatures