See ChangeLog

MStarmans91 · Jun 1, 2023 · 75277c7 · 75277c7
1 parent c3dcef2
commit 75277c7
Show file tree

Hide file tree

Showing 7 changed files with 42 additions and 6 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -18,6 +18,7 @@ Fixed
 - SimpleWORC and BasicWORC now detect whether user has provided a separate training
   and test set and thus bootstrapping should be used.
 - Bug in PREDICT was fixed that mixed up the mode in shape feature extraction (2D / 2.5D)
+- Bug in performance calculation of multiclass classification.
 
 Changed
 ~~~~~~~

diff --git a/WORC/classification/SearchCV.py b/WORC/classification/SearchCV.py
@@ -1677,7 +1677,7 @@ def _fit(self, X, y, groups, parameter_iterable):
         estimatordata = f"vfs://tmp/GS/{name}/{fname}"
 
         # Create the fastr network
-        network = fastr.create_network('WORC_CASHOptimization_' + name)
+        network = fastr.create_network('WORC_CASH_' + name)
         estimator_data = network.create_source('HDF5', id='estimator_source', resources=ResourceLimit(memory='4G'))
         traintest_data = network.create_source('HDF5', id='traintest', resources=ResourceLimit(memory='4G'))
         parameter_data = network.create_source('JsonFile', id='parameters', resources=ResourceLimit(memory='4G'))

diff --git a/WORC/doc/static/user_manual.rst b/WORC/doc/static/user_manual.rst
@@ -211,9 +211,25 @@ Training and test sets
 When using a single dataset for both training and evaluation, you should
 only supply "training" datasets. By default, performance on a single
 dataset will be evaluated using cross-validation (default random split, but leave-one-out can also be configured). 
+
 Alternatively, you can supply a separate training and test set, by which you tell 
 ``WORC`` to use this single train-test split. To distinguish between these, for every source, we have a 
-train and test object which you can set:
+train and test object which you can set.
+
+.. note:: When using a separate train and test set, you always need to provide a training and test label file as well.
+        These can refer to the same CSV / Excel file.
+
+When using ``SimpleWORC`` or ``BasicWORC``, you can do
+this through the same function as the training set, but setting  ``is_training=False``, e.g.:
+
+
+.. code-block:: python
+
+    experiment.images_from_this_directory(testimagedatadir,
+                                          image_file_name=image_file_name,
+                                          is_training=False)
+
+When using the ``WORC`` object, or directly setting your sources in ``BasicWORC``, this would look like:
 
 .. code-block:: python
 

diff --git a/WORC/exampledata/create_example_data.py b/WORC/exampledata/create_example_data.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright 2016-2020 Biomedical Imaging Group Rotterdam, Departments of
+# Copyright 2016-2023 Biomedical Imaging Group Rotterdam, Departments of
 # Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -18,6 +18,7 @@
 import pandas as pd
 import numpy as np
 import os
+import SimpleITK as sitk
 
 currentdir = os.path.dirname(os.path.realpath(__file__))
 
@@ -52,7 +53,25 @@ def create_random_features(n_objects=7, n_features=10):
 
         print(f'Saving image features for object {i}.')
         panda_data.to_hdf(output, 'image_features')
+
+
+def create_random_imageandmask(size=512, slices=20):
+    # Create the image
+    image = np.rand.rand((size, size, slices))
+
+    # Create a sphere mask in the center
+    ra = range(-int(slices/2), int(slices/2)+1)
+    x, y, z = np.meshgrid(ra, ra, ra)
+    radius = np.sqrt(x**2 + y**2 + z**2)
+    mask = radius.astype(int) == int(size/4)
 
+    # Convert the images to ITK objects
+    image = sitk.GetImageFromArray(image.astype(np.float32))
+    mask = sitk.GetImageFromArray(mask.astype(int))
+
+    return image, mask
+
 
 if __name__ == "__main__":
     create_random_features()
+    create_random_imageandmask()
diff --git a/WORC/plotting/plot_estimator_performance.py b/WORC/plotting/plot_estimator_performance.py
@@ -722,7 +722,7 @@ def plot_estimator_performance(prediction, label_data, label_type,
                         for name, perf in zip(metric_names_single, performances):
                             for nlabel, label in enumerate(label_type.split(',')):
                                 all_performances[f"{name}_{label}"] = perf[nlabel]
-                                stats[f"{name}_{label} 95%:"] = f"{np.nanmean(perf[nlabel])} {str(compute_confidence(perf, N_1, N_2, alpha))}"
+                                stats[f"{name}_{label} 95%:"] = f"{np.nanmean(perf[nlabel])} {str(compute_confidence(perf[nlabel], N_1, N_2, alpha))}"
                     else:
                         # Singleclass
                         performances = [accuracy, bca, sensitivity, specificity,

diff --git a/WORC/tests/WORCTutorialSimple_unittest_multiclass.py b/WORC/tests/WORCTutorialSimple_unittest_multiclass.py
@@ -188,7 +188,7 @@ def main():
     # Read the overall peformance
     performance_file = os.path.join(experiment_folder, 'performance_all_0.json')
     if not os.path.exists(performance_file):
-        raise ValueError('No performance file found: your network has failed.')
+        print('No performance file found: your network has failed.')
 
     with open(performance_file, 'r') as fp:
         performance = json.load(fp)

diff --git a/WORC/tests/WORCTutorialSimple_unittest_regression.py b/WORC/tests/WORCTutorialSimple_unittest_regression.py
@@ -185,7 +185,7 @@ def main():
     # Read the overall peformance
     performance_file = os.path.join(experiment_folder, 'performance_all_0.json')
     if not os.path.exists(performance_file):
-        raise ValueError('No performance file found: your network has failed.')
+        print('No performance file found: your network has failed.')
 
     with open(performance_file, 'r') as fp:
         performance = json.load(fp)