Merge pull request #61 from MStarmans91/development

Release version 3.4.4
MStarmans91 · Jul 1, 2021 · 37197ce · 37197ce
2 parents a16f8fa + 80a8582
commit 37197ce
Show file tree

Hide file tree

Showing 106 changed files with 930 additions and 586 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -6,6 +6,27 @@ All notable changes to this project will be documented in this file.
 The format is based on `Keep a Changelog <http://keepachangelog.com/>`_
 and this project adheres to `Semantic Versioning <http://semver.org/>`_
 
+3.4.4 - 2021-07-01
+------------------
+
+Fixed
+~~~~~
+- Bug where most recent added estimators were not valid in SimpleWORC.
+- SelectorMixin is now imported directly from sklearn.feature_selection,
+  as sklearn.feature_selection.base is deprecated and will be removed.
+
+Changed
+~~~~~~~
+- Apply variance threshold selection before feature scaling, otherwise
+  variance is always the same for all features.
+- RELIEF, selection using a model, PCA, and univariate testing default
+  use changed from 0.20 to 0.275.
+
+Added
+~~~~~~~
+- Functionality in plotting images functions.
+- Documentation on how to use your own features.
+
 3.4.3 - 2021-06-02
 ------------------
 

diff --git a/README.md b/README.md
@@ -1,4 +1,4 @@
-# WORC v3.4.3
+# WORC v3.4.4
 ## Workflow for Optimal Radiomics Classification
 
 ## Information

diff --git a/README.rst b/README.rst
@@ -1,4 +1,4 @@
-WORC v3.4.3
+WORC v3.4.4
 ===========
 
 Workflow for Optimal Radiomics Classification

diff --git a/WORC/WORC.py b/WORC/WORC.py
@@ -355,16 +355,16 @@ def defaultconfig(self):
         config['Featsel'] = dict()
         config['Featsel']['Variance'] = '1.0'
         config['Featsel']['GroupwiseSearch'] = 'True'
-        config['Featsel']['SelectFromModel'] = '0.2'
+        config['Featsel']['SelectFromModel'] = '0.275'
         config['Featsel']['SelectFromModel_estimator'] = 'Lasso, LR, RF'
         config['Featsel']['SelectFromModel_lasso_alpha'] = '0.1, 1.4'
         config['Featsel']['SelectFromModel_n_trees'] = '10, 90'
-        config['Featsel']['UsePCA'] = '0.2'
+        config['Featsel']['UsePCA'] = '0.275'
         config['Featsel']['PCAType'] = '95variance, 10, 50, 100'
-        config['Featsel']['StatisticalTestUse'] = '0.2'
+        config['Featsel']['StatisticalTestUse'] = '0.275'
         config['Featsel']['StatisticalTestMetric'] = 'MannWhitneyU'
         config['Featsel']['StatisticalTestThreshold'] = '-3, 2.5'
-        config['Featsel']['ReliefUse'] = '0.2'
+        config['Featsel']['ReliefUse'] = '0.275'
         config['Featsel']['ReliefNN'] = '2, 4'
         config['Featsel']['ReliefSampleSize'] = '0.75, 0.2'
         config['Featsel']['ReliefDistanceP'] = '1, 3'
@@ -450,10 +450,11 @@ def defaultconfig(self):
 
         # Based on https://towardsdatascience.com/doing-xgboost-hyper-parameter-tuning-the-smart-way-part-1-of-2-f6d255a45dde
         # and https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/
+        # and https://medium.com/data-design/xgboost-hi-im-gamma-what-can-i-do-for-you-and-the-tuning-of-regularization-a42ea17e6ab6
         config['Classification']['XGB_boosting_rounds'] = config['Classification']['RFn_estimators']
         config['Classification']['XGB_max_depth'] = '3, 12'
         config['Classification']['XGB_learning_rate'] = config['Classification']['AdaBoost_learning_rate']
-        config['Classification']['XGB_gamma'] = '0.01, 0.99'
+        config['Classification']['XGB_gamma'] = '0.01, 9.99'
         config['Classification']['XGB_min_child_weight'] = '1, 6'
         config['Classification']['XGB_colsample_bytree'] = '0.3, 0.7'
 

diff --git a/WORC/classification/SearchCV.py b/WORC/classification/SearchCV.py
@@ -592,6 +592,9 @@ def preprocess(self, X, y=None, training=False):
         if self.best_groupsel is not None:
             X = self.best_groupsel.transform(X)
 
+        if self.best_varsel is not None:
+            X = self.best_varsel.transform(X)
+
         if not training and hasattr(self, 'overfit_scaler') and self.overfit_scaler:
             # Overfit the feature scaling on the test set
             # NOTE: Never use this in an actual model, only to assess how
@@ -608,9 +611,6 @@ def preprocess(self, X, y=None, training=False):
             if self.best_scaler is not None:
                 X = self.best_scaler.transform(X)
 
-        if self.best_varsel is not None:
-            X = self.best_varsel.transform(X)
-
         if self.best_reliefsel is not None:
             X = self.best_reliefsel.transform(X)
 

diff --git a/WORC/classification/construct_classifier.py b/WORC/classification/construct_classifier.py
@@ -325,8 +325,8 @@ def create_param_grid(config):
                          scale=config['AdaBoost_n_estimators'][1])
 
     param_grid['AdaBoost_learning_rate'] =\
-        scipy.stats.uniform(loc=config['AdaBoost_learning_rate'][0],
-                            scale=config['AdaBoost_learning_rate'][1])
+        log_uniform(loc=config['AdaBoost_learning_rate'][0],
+                    scale=config['AdaBoost_learning_rate'][1])
 
     # XGDBoost parameters
     param_grid['XGB_boosting_rounds'] =\
@@ -338,8 +338,8 @@ def create_param_grid(config):
                          scale=config['XGB_max_depth'][1])
 
     param_grid['XGB_learning_rate'] =\
-        scipy.stats.uniform(loc=config['XGB_learning_rate'][0],
-                            scale=config['XGB_learning_rate'][1])
+        log_uniform(loc=config['XGB_learning_rate'][0],
+                    scale=config['XGB_learning_rate'][1])
 
     param_grid['XGB_gamma'] =\
         scipy.stats.uniform(loc=config['XGB_gamma'][0],

diff --git a/WORC/classification/fitandscore.py b/WORC/classification/fitandscore.py
@@ -199,6 +199,7 @@ def fit_and_score(X, y, scoring,
         print("\n")
         print('#######################################')
         print('Starting fit and score of new workflow.')
+
     para_estimator = parameters.copy()
     estimator = cc.construct_classifier(para_estimator)
 
@@ -406,38 +407,6 @@ def fit_and_score(X, y, scoring,
         else:
             return ret
 
-    # ------------------------------------------------------------------------
-    # Feature scaling
-    if verbose and para_estimator['FeatureScaling'] != 'None':
-        print(f'Fitting scaler and transforming features, method ' +
-              f'{para_estimator["FeatureScaling"]}.')
-
-    scaling_method = para_estimator['FeatureScaling']
-    if scaling_method == 'None':
-        scaler = None
-    else:
-        skip_features = para_estimator['FeatureScaling_skip_features']
-        n_skip_feat = len([i for i in feature_labels[0] if any(e in i for e in skip_features)])
-        if n_skip_feat == len(X_train[0]):
-            # Don't need to scale any features
-            if verbose:
-                print('[WORC Warning] Skipping scaling, only skip features selected.')
-            scaler = None
-        else:
-            scaler = WORCScaler(method=scaling_method, skip_features=skip_features)
-            scaler.fit(X_train, feature_labels[0])
-
-    if scaler is not None:
-        X_train = scaler.transform(X_train)
-        X_test = scaler.transform(X_test)
-
-    del para_estimator['FeatureScaling']
-    del para_estimator['FeatureScaling_skip_features']
-
-    # Delete the object if we do not need to return it
-    if not return_all:
-        del scaler
-
     # --------------------------------------------------------------------
     # Feature selection based on variance
     if para_estimator['Featsel_Variance'] == 'True':
@@ -474,6 +443,39 @@ def fit_and_score(X, y, scoring,
         else:
             return ret
 
+    # ------------------------------------------------------------------------
+    # Feature scaling
+    if verbose and para_estimator['FeatureScaling'] != 'None':
+        print(f'Fitting scaler and transforming features, method ' +
+              f'{para_estimator["FeatureScaling"]}.')
+
+    scaling_method = para_estimator['FeatureScaling']
+    if scaling_method == 'None':
+        scaler = None
+    else:
+        skip_features = para_estimator['FeatureScaling_skip_features']
+        n_skip_feat = len([i for i in feature_labels[0] if any(e in i for e in skip_features)])
+        if n_skip_feat == len(X_train[0]):
+            # Don't need to scale any features
+            if verbose:
+                print('[WORC Warning] Skipping scaling, only skip features selected.')
+            scaler = None
+        else:
+            scaler = WORCScaler(method=scaling_method, skip_features=skip_features)
+            scaler.fit(X_train, feature_labels[0])
+
+    if scaler is not None:
+        X_train = scaler.transform(X_train)
+        X_test = scaler.transform(X_test)
+
+    del para_estimator['FeatureScaling']
+    del para_estimator['FeatureScaling_skip_features']
+
+    # Delete the object if we do not need to return it
+    if not return_all:
+        del scaler
+
+
     # --------------------------------------------------------------------
     # Relief feature selection, possibly multi classself.
     # Needs to be done after scaling!

diff --git a/WORC/doc/_build/doctrees/autogen/WORC.featureprocessing.doctree b/WORC/doc/_build/doctrees/autogen/WORC.featureprocessing.doctree
diff --git a/WORC/doc/_build/doctrees/autogen/WORC.plotting.doctree b/WORC/doc/_build/doctrees/autogen/WORC.plotting.doctree
diff --git a/WORC/doc/_build/doctrees/environment.pickle b/WORC/doc/_build/doctrees/environment.pickle
diff --git a/WORC/doc/_build/doctrees/static/changelog.doctree b/WORC/doc/_build/doctrees/static/changelog.doctree
diff --git a/WORC/doc/_build/doctrees/static/configuration.doctree b/WORC/doc/_build/doctrees/static/configuration.doctree
diff --git a/WORC/doc/_build/doctrees/static/quick_start.doctree b/WORC/doc/_build/doctrees/static/quick_start.doctree
diff --git a/WORC/doc/_build/html/.buildinfo b/WORC/doc/_build/html/.buildinfo
@@ -1,4 +1,4 @@
 # Sphinx build info version 1
 # This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
-config: 1a400459aca376ecf872ea4fb53480b0
+config: eb52688555d036dabae0d8acf57cbe18
 tags: 645f666f9bcd5a90fca523b33c5a78b7
diff --git a/WORC/doc/_build/html/_modules/WORC/IOparser/config_WORC.html b/WORC/doc/_build/html/_modules/WORC/IOparser/config_WORC.html
@@ -8,7 +8,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>WORC.IOparser.config_WORC &mdash; WORC 3.4.3 documentation</title>
+  <title>WORC.IOparser.config_WORC &mdash; WORC 3.4.4 documentation</title>
 
 
 
@@ -62,7 +62,7 @@
 
 
               <div class="version">
-                3.4.3
+                3.4.4
               </div>
 
 

diff --git a/WORC/doc/_build/html/_modules/WORC/IOparser/config_io_classifier.html b/WORC/doc/_build/html/_modules/WORC/IOparser/config_io_classifier.html
@@ -8,7 +8,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>WORC.IOparser.config_io_classifier &mdash; WORC 3.4.3 documentation</title>
+  <title>WORC.IOparser.config_io_classifier &mdash; WORC 3.4.4 documentation</title>
 
 
 
@@ -62,7 +62,7 @@
 
 
               <div class="version">
-                3.4.3
+                3.4.4
               </div>
 
 

diff --git a/WORC/doc/_build/html/_modules/WORC/IOparser/config_preprocessing.html b/WORC/doc/_build/html/_modules/WORC/IOparser/config_preprocessing.html
@@ -8,7 +8,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>WORC.IOparser.config_preprocessing &mdash; WORC 3.4.3 documentation</title>
+  <title>WORC.IOparser.config_preprocessing &mdash; WORC 3.4.4 documentation</title>
 
 
 
@@ -62,7 +62,7 @@
 
 
               <div class="version">
-                3.4.3
+                3.4.4
               </div>
 
 

diff --git a/WORC/doc/_build/html/_modules/WORC/IOparser/config_segmentix.html b/WORC/doc/_build/html/_modules/WORC/IOparser/config_segmentix.html
@@ -8,7 +8,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>WORC.IOparser.config_segmentix &mdash; WORC 3.4.3 documentation</title>
+  <title>WORC.IOparser.config_segmentix &mdash; WORC 3.4.4 documentation</title>
 
 
 
@@ -62,7 +62,7 @@
 
 
               <div class="version">
-                3.4.3
+                3.4.4
               </div>
 
 

diff --git a/WORC/doc/_build/html/_modules/WORC/IOparser/file_io.html b/WORC/doc/_build/html/_modules/WORC/IOparser/file_io.html
@@ -8,7 +8,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>WORC.IOparser.file_io &mdash; WORC 3.4.3 documentation</title>
+  <title>WORC.IOparser.file_io &mdash; WORC 3.4.4 documentation</title>
 
 
 
@@ -62,7 +62,7 @@
 
 
               <div class="version">
-                3.4.3
+                3.4.4
               </div>
 
 

diff --git a/WORC/doc/_build/html/_modules/WORC/WORC.html b/WORC/doc/_build/html/_modules/WORC/WORC.html
@@ -8,7 +8,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>WORC.WORC &mdash; WORC 3.4.3 documentation</title>
+  <title>WORC.WORC &mdash; WORC 3.4.4 documentation</title>
 
 
 
@@ -62,7 +62,7 @@
 
 
               <div class="version">
-                3.4.3
+                3.4.4
               </div>
 
 
@@ -520,16 +520,16 @@ <h1>Source code for WORC.WORC</h1><div class="highlight"><pre>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="nb">dict</span><span class="p">()</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;Variance&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;1.0&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;GroupwiseSearch&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;True&#39;</span>
-        <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;SelectFromModel&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.2&#39;</span>
+        <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;SelectFromModel&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.275&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;SelectFromModel_estimator&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;Lasso, LR, RF&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;SelectFromModel_lasso_alpha&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.1, 1.4&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;SelectFromModel_n_trees&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;10, 90&#39;</span>
-        <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;UsePCA&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.2&#39;</span>
+        <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;UsePCA&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.275&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;PCAType&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;95variance, 10, 50, 100&#39;</span>
-        <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;StatisticalTestUse&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.2&#39;</span>
+        <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;StatisticalTestUse&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.275&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;StatisticalTestMetric&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;MannWhitneyU&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;StatisticalTestThreshold&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;-3, 2.5&#39;</span>
-        <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;ReliefUse&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.2&#39;</span>
+        <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;ReliefUse&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.275&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;ReliefNN&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;2, 4&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;ReliefSampleSize&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.75, 0.2&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Featsel&#39;</span><span class="p">][</span><span class="s1">&#39;ReliefDistanceP&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;1, 3&#39;</span>
@@ -615,10 +615,11 @@ <h1>Source code for WORC.WORC</h1><div class="highlight"><pre>
 
         <span class="c1"># Based on https://towardsdatascience.com/doing-xgboost-hyper-parameter-tuning-the-smart-way-part-1-of-2-f6d255a45dde</span>
         <span class="c1"># and https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/</span>
+        <span class="c1"># and https://medium.com/data-design/xgboost-hi-im-gamma-what-can-i-do-for-you-and-the-tuning-of-regularization-a42ea17e6ab6</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Classification&#39;</span><span class="p">][</span><span class="s1">&#39;XGB_boosting_rounds&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Classification&#39;</span><span class="p">][</span><span class="s1">&#39;RFn_estimators&#39;</span><span class="p">]</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Classification&#39;</span><span class="p">][</span><span class="s1">&#39;XGB_max_depth&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;3, 12&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Classification&#39;</span><span class="p">][</span><span class="s1">&#39;XGB_learning_rate&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Classification&#39;</span><span class="p">][</span><span class="s1">&#39;AdaBoost_learning_rate&#39;</span><span class="p">]</span>
-        <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Classification&#39;</span><span class="p">][</span><span class="s1">&#39;XGB_gamma&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.01, 0.99&#39;</span>
+        <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Classification&#39;</span><span class="p">][</span><span class="s1">&#39;XGB_gamma&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.01, 9.99&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Classification&#39;</span><span class="p">][</span><span class="s1">&#39;XGB_min_child_weight&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;1, 6&#39;</span>
         <span class="n">config</span><span class="p">[</span><span class="s1">&#39;Classification&#39;</span><span class="p">][</span><span class="s1">&#39;XGB_colsample_bytree&#39;</span><span class="p">]</span> <span class="o">=</span> <span class="s1">&#39;0.3, 0.7&#39;</span>
 

diff --git a/WORC/doc/_build/html/_modules/WORC/addexceptions.html b/WORC/doc/_build/html/_modules/WORC/addexceptions.html
@@ -8,7 +8,7 @@
 
   <meta name="viewport" content="width=device-width, initial-scale=1.0">
 
-  <title>WORC.addexceptions &mdash; WORC 3.4.3 documentation</title>
+  <title>WORC.addexceptions &mdash; WORC 3.4.4 documentation</title>
 
 
 
@@ -62,7 +62,7 @@
 
 
               <div class="version">
-                3.4.3
+                3.4.4
               </div>