From 43049c1f49615ce905e948712cf598677ff158e3 Mon Sep 17 00:00:00 2001 From: Jirka Borovec Date: Sun, 13 Jan 2019 09:41:02 +0100 Subject: [PATCH] Cross-Validation (#17) * fix & refactor CrossVal * update CI * update manifest * refactoring --- .shippable.yml | 34 ----- .travis.yml | 11 +- MANIFEST.in | 4 +- README.md | 8 +- circle.yml | 40 +---- .../run_center_clustering.py | 4 +- .../test_ovary_cebters.sh | 8 + .../run_ovary_egg-segmentation.py | 2 +- experiments_ovary_detect/test_ovary_detect.sh | 13 ++ .../run_segm_slic_model_graphcut.py | 2 +- .../test_segmentations.sh | 16 ++ handling_annotations/test_annotations.sh | 23 +++ imsegm/classification.py | 137 +++++++++--------- imsegm/descriptors.py | 124 +++++++--------- imsegm/region_growing.py | 4 +- setup.py | 6 +- 16 files changed, 212 insertions(+), 224 deletions(-) create mode 100644 experiments_ovary_centres/test_ovary_cebters.sh create mode 100644 experiments_ovary_detect/test_ovary_detect.sh create mode 100644 experiments_segmentation/test_segmentations.sh create mode 100644 handling_annotations/test_annotations.sh diff --git a/.shippable.yml b/.shippable.yml index 41c52f47..adbeafe8 100755 --- a/.shippable.yml +++ b/.shippable.yml @@ -52,40 +52,6 @@ script: - python setup.py check -m -s - flake8 . --ignore=E402,E731 --max-line-length=100 - # ANNOTATION section - - python handling_annotations/run_image_color_quantization.py -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" - - python handling_annotations/run_image_convert_label_color.py -imgs "./data_images/drosophila_ovary_slice/segm/*.png" -out ./data_images/drosophila_ovary_slice/segm_rgb - - python handling_annotations/run_overlap_images_segms.py -imgs "./data_images/drosophila_ovary_slice/image/*.jpg" -segs ./data_images/drosophila_ovary_slice/segm -out ./results/overlap_ovary_segment - - python handling_annotations/run_segm_annot_inpaint.py -imgs "./data_images/drosophila_ovary_slice/segm/*.png" --label 0 - - python handling_annotations/run_segm_annot_relabel.py -imgs "./data_images/drosophila_ovary_slice/center_levels/*.png" -out ./results/relabel_center_levels - - # SEGMENTATION section - - rm -r -f results && mkdir results - - python experiments_segmentation/run_compute_stat_annot_segm.py -a "data_images/drosophila_ovary_slice/annot_struct/*.png" -s "data_images/drosophila_ovary_slice/segm/*.png" --visual - - python experiments_segmentation/run_segm_slic_model_graphcut.py -i "data_images/drosophila_disc/image/img_[5,6].jpg" -cfg ./experiments_segmentation/sample_config.json --visual - - python experiments_segmentation/run_segm_slic_classif_graphcut.py -l data_images/drosophila_ovary_slice/list_imgs-annot-struct_short.csv -i "data_images/drosophila_ovary_slice/image/insitu41*.jpg" -cfg ./experiments_segmentation/sample_config.json --visual - - # CENTER DETECT. section - - rm -r -f results && mkdir results - - python experiments_ovary_centres/run_create_annotation.py - - python experiments_ovary_centres/run_center_candidate_training.py - - python experiments_ovary_centres/run_center_prediction.py - - python experiments_ovary_centres/run_center_clustering.py - - python experiments_ovary_centres/run_center_evaluation.py - - # download MorphSnake - - pip install git+https://github.com/Borda/morph-snakes.git - # REGION GROWING section - - rm -r -f results && mkdir results - - python experiments_ovary_detect/run_RG2Sp_estim_shape-models.py - - python experiments_ovary_detect/run_ovary_egg-segmentation.py -m ellipse_moments ellipse_ransac_mmt ellipse_ransac_crit GC_pixels-large GC_pixels-shape GC_slic-small GC_slic-shape rg2sp_greedy-single rg2sp_GC-mixture watershed_morph - - python experiments_ovary_detect/run_ovary_segm_evaluation.py - - python experiments_ovary_detect/run_export_user-annot-segm.py - - python experiments_ovary_detect/run_cut_segmented_objects.py - - python experiments_ovary_detect/run_ellipse_annot_match.py - - python experiments_ovary_detect/run_ellipse_cut_scale.py - - python experiments_ovary_detect/run_egg_swap_orientation.py - after_success: - python setup.py install diff --git a/.travis.yml b/.travis.yml index 43f3a748..f80d2c7c 100755 --- a/.travis.yml +++ b/.travis.yml @@ -48,9 +48,16 @@ script: after_success: - codecov # public repository on Travis CI - # private repository on Travis CI - # - codecov -t 80efed4e-ac2b-4fea-a642-0a8b1c82e1c8 - coverage xml - python-codacy-coverage -r coverage.xml - coverage report + # ANNOTATION section + - bash handling_annotations/test_annotations.sh + # SEGMENTATION section + - bash experiments_segmentation/test_segmentations.sh + # CENTER DETECT. section + - bash experiments_ovary_centres/test_ovary_cebters.sh + # REGION GROWING section + - bash experiments_ovary_detect/test_ovary_detect.sh + # test installed package - cd .. && python -c "import imsegm.descriptors" diff --git a/MANIFEST.in b/MANIFEST.in index 1be92896..793a415e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -21,8 +21,8 @@ include setup.* # Exclude build configs exclude *.yml -# Exclude tests -exclude test*.py +# Exclude experiments +exclude */*.sh # Include the experiments recursive-include experiments_* *.py *.json diff --git a/README.md b/README.md index b524f9be..b46b5447 100755 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ Moreover, we are using python [GraphCut wrapper](https://github.com/Borda/pyGCO) **Compilation** -We have implemented cython version of some functions, especially computing descriptors, which require to compile them before using them +We have implemented `cython` version of some functions, especially computing descriptors, which require to compile them before using them ```bash python setup.py build_ext --inplace ``` @@ -98,7 +98,11 @@ If loading of compiled descriptors in `cython` fails, it is automatically swappe **Installation** -The package can be installed via pip from the folder +The package can be installed via pip +```bash +pip install git+https://github.com/Borda/pyImSegm.git +``` + or using `setuptools` from local folder ```bash python setup.py install ``` diff --git a/circle.yml b/circle.yml index 9c3b9770..dbf0a36e 100755 --- a/circle.yml +++ b/circle.yml @@ -35,48 +35,16 @@ jobs: flake8 . --ignore=E402,E731 --max-line-length=100 # ANNOTATION section - - run: - name: Annotations - command: | - python handling_annotations/run_image_color_quantization.py -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" - python handling_annotations/run_image_color_quantization.py -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" -m position - python handling_annotations/run_image_convert_label_color.py -imgs "./data_images/drosophila_ovary_slice/segm/*.png" -out ./data_images/drosophila_ovary_slice/segm_rgb - python handling_annotations/run_image_convert_label_color.py -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" -out ./data_images/drosophila_ovary_slice/segm - python handling_annotations/run_overlap_images_segms.py -imgs "./data_images/drosophila_ovary_slice/image/*.jpg" -segs ./data_images/drosophila_ovary_slice/segm -out ./results/overlap_ovary_segment - python handling_annotations/run_segm_annot_inpaint.py -imgs "./data_images/drosophila_ovary_slice/segm/*.png" --label 0 - python handling_annotations/run_segm_annot_relabel.py -imgs "./data_images/drosophila_ovary_slice/center_levels/*.png" -out ./results/relabel_center_levels + - run: bash handling_annotations/test_annotations.sh # SEGMENTATION section - - run: - name: Segmentation - command: | - python experiments_segmentation/run_compute_stat_annot_segm.py -a "data_images/drosophila_ovary_slice/annot_struct/*.png" -s "data_images/drosophila_ovary_slice/segm/*.png" --visual - python experiments_segmentation/run_segm_slic_model_graphcut.py -i "data_images/drosophila_disc/image/img_[5,6].jpg" -cfg ./experiments_segmentation/sample_config.json --visual - python experiments_segmentation/run_segm_slic_classif_graphcut.py -l data_images/drosophila_ovary_slice/list_imgs-annot-struct_short.csv -i "data_images/drosophila_ovary_slice/image/insitu41*.jpg" -cfg ./experiments_segmentation/sample_config.json --visual + - run: bash experiments_segmentation/test_segmentations.sh # CENTER DETECT. section - - run: - name: Center detection - command: | - python experiments_ovary_centres/run_create_annotation.py - python experiments_ovary_centres/run_center_candidate_training.py - python experiments_ovary_centres/run_center_prediction.py - python experiments_ovary_centres/run_center_clustering.py - python experiments_ovary_centres/run_center_evaluation.py + - run: bash experiments_ovary_centres/test_ovary_cebters.sh # REGION GROWING section - - run: - name: Region Growing - command: | - pip install --user git+https://github.com/Borda/morph-snakes.git - python experiments_ovary_detect/run_RG2Sp_estim_shape-models.py - python experiments_ovary_detect/run_ovary_egg-segmentation.py -m ellipse_moments ellipse_ransac_mmt ellipse_ransac_crit GC_pixels-large GC_pixels-shape GC_slic-small GC_slic-shape rg2sp_greedy-single rg2sp_GC-mixture watershed_morph - python experiments_ovary_detect/run_ovary_segm_evaluation.py --visual - python experiments_ovary_detect/run_export_user-annot-segm.py - python experiments_ovary_detect/run_cut_segmented_objects.py - python experiments_ovary_detect/run_ellipse_annot_match.py - python experiments_ovary_detect/run_ellipse_cut_scale.py - python experiments_ovary_detect/run_egg_swap_orientation.py + - run: bash experiments_ovary_detect/test_ovary_detect.sh # PASSING - run: diff --git a/experiments_ovary_centres/run_center_clustering.py b/experiments_ovary_centres/run_center_clustering.py index b940af5f..71aeb879 100755 --- a/experiments_ovary_centres/run_center_clustering.py +++ b/experiments_ovary_centres/run_center_clustering.py @@ -66,7 +66,7 @@ def cluster_center_candidates(points, max_dist=100, min_samples=1): :return (ndarray, [int]): """ points = np.array(points) - if len(points) == 0: + if not list(points): return points, [] dbscan = cluster.DBSCAN(eps=max_dist, min_samples=min_samples) dbscan.fit(points) @@ -134,7 +134,7 @@ def cluster_points_draw_export(dict_row, params, path_out=None): 'missing some required fields: %s' % repr(dict_row) name = os.path.splitext(os.path.basename(dict_row['path_points']))[0] points = tl_data.load_landmarks_csv(dict_row['path_points']) - if len(points) == 0: + if not list(points): logging.debug('no points to cluster for "%s"', name) points = tl_data.swap_coord_x_y(points) diff --git a/experiments_ovary_centres/test_ovary_cebters.sh b/experiments_ovary_centres/test_ovary_cebters.sh new file mode 100644 index 00000000..f80b4021 --- /dev/null +++ b/experiments_ovary_centres/test_ovary_cebters.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +rm -r -f results && mkdir results +python experiments_ovary_centres/run_create_annotation.py +python experiments_ovary_centres/run_center_candidate_training.py +python experiments_ovary_centres/run_center_prediction.py +python experiments_ovary_centres/run_center_clustering.py +python experiments_ovary_centres/run_center_evaluation.py \ No newline at end of file diff --git a/experiments_ovary_detect/run_ovary_egg-segmentation.py b/experiments_ovary_detect/run_ovary_egg-segmentation.py index 9adc28ec..1007eab6 100755 --- a/experiments_ovary_detect/run_ovary_egg-segmentation.py +++ b/experiments_ovary_detect/run_ovary_egg-segmentation.py @@ -701,7 +701,7 @@ def image_segmentation(idx_row, params, debug_export=DEBUG_EXPORT): return name centers = tl_data.load_landmarks_csv(row_path['path_centers']) centers = tl_data.swap_coord_x_y(centers) - if len(centers) == 0: + if not list(centers): logging.warning('no center was detected for "%s"', name) return name # img = seg / float(seg.max()) diff --git a/experiments_ovary_detect/test_ovary_detect.sh b/experiments_ovary_detect/test_ovary_detect.sh new file mode 100644 index 00000000..396cb2a6 --- /dev/null +++ b/experiments_ovary_detect/test_ovary_detect.sh @@ -0,0 +1,13 @@ +#!/usr/bin/env bash + +pip install --user git+https://github.com/Borda/morph-snakes.git +rm -r -f results && mkdir results +python experiments_ovary_detect/run_RG2Sp_estim_shape-models.py +python experiments_ovary_detect/run_ovary_egg-segmentation.py \ + -m ellipse_moments ellipse_ransac_mmt ellipse_ransac_crit GC_pixels-large GC_pixels-shape GC_slic-small GC_slic-shape rg2sp_greedy-single rg2sp_GC-mixture watershed_morph +python experiments_ovary_detect/run_ovary_segm_evaluation.py --visual +python experiments_ovary_detect/run_export_user-annot-segm.py +python experiments_ovary_detect/run_cut_segmented_objects.py +python experiments_ovary_detect/run_ellipse_annot_match.py +python experiments_ovary_detect/run_ellipse_cut_scale.py +python experiments_ovary_detect/run_egg_swap_orientation.py \ No newline at end of file diff --git a/experiments_segmentation/run_segm_slic_model_graphcut.py b/experiments_segmentation/run_segm_slic_model_graphcut.py index ef6a4cd3..252bff0a 100644 --- a/experiments_segmentation/run_segm_slic_model_graphcut.py +++ b/experiments_segmentation/run_segm_slic_model_graphcut.py @@ -509,7 +509,7 @@ def main(params): tl_expt.create_subfolders(params['path_exp'], LIST_FOLDERS_DEBUG) paths_img = load_path_images(params) - assert len(paths_img) > 0, 'missing images' + assert paths_img, 'missing images' def _path_expt(n): return os.path.join(params['path_exp'], n) diff --git a/experiments_segmentation/test_segmentations.sh b/experiments_segmentation/test_segmentations.sh new file mode 100644 index 00000000..0ea7d204 --- /dev/null +++ b/experiments_segmentation/test_segmentations.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +rm -r -f results && mkdir results +python experiments_segmentation/run_compute_stat_annot_segm.py \ + -a "data_images/drosophila_ovary_slice/annot_struct/*.png" \ + -s "data_images/drosophila_ovary_slice/segm/*.png" \ + --visual +python experiments_segmentation/run_segm_slic_model_graphcut.py \ + -i "data_images/drosophila_disc/image/img_[5,6].jpg" \ + -cfg ./experiments_segmentation/sample_config.json \ + --visual +python experiments_segmentation/run_segm_slic_classif_graphcut.py \ + -l data_images/drosophila_ovary_slice/list_imgs-annot-struct_short.csv \ + -i "data_images/drosophila_ovary_slice/image/insitu41*.jpg" \ + -cfg ./experiments_segmentation/sample_config.json \ + --visual \ No newline at end of file diff --git a/handling_annotations/test_annotations.sh b/handling_annotations/test_annotations.sh new file mode 100644 index 00000000..d9cc2206 --- /dev/null +++ b/handling_annotations/test_annotations.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +python handling_annotations/run_image_color_quantization.py \ + -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" +python handling_annotations/run_image_color_quantization.py \ + -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" \ + -m position +python handling_annotations/run_image_convert_label_color.py \ + -imgs "./data_images/drosophila_ovary_slice/segm/*.png" \ + -out ./data_images/drosophila_ovary_slice/segm_rgb +python handling_annotations/run_image_convert_label_color.py \ + -imgs "./data_images/drosophila_ovary_slice/segm_rgb/*.png" \ + -out ./data_images/drosophila_ovary_slice/segm +python handling_annotations/run_overlap_images_segms.py \ + -imgs "./data_images/drosophila_ovary_slice/image/*.jpg" \ + -segs ./data_images/drosophila_ovary_slice/segm \ + -out ./results/overlap_ovary_segment +python handling_annotations/run_segm_annot_inpaint.py \ + -imgs "./data_images/drosophila_ovary_slice/segm/*.png" \ + --label 0 +python handling_annotations/run_segm_annot_relabel.py \ + -imgs "./data_images/drosophila_ovary_slice/center_levels/*.png" \ + -out ./results/relabel_center_levels \ No newline at end of file diff --git a/imsegm/classification.py b/imsegm/classification.py index 38a76083..6816cc20 100755 --- a/imsegm/classification.py +++ b/imsegm/classification.py @@ -695,7 +695,7 @@ def create_classif_search_train_export(clf_name, features, labels, cross_val=10, './classif_RandForest_search_params_scores.txt'] >>> for p in files: os.remove(p) """ - assert len(labels) > 0, 'some labels has to be given' + assert list(labels), 'some labels has to be given' features = np.nan_to_num(features) assert len(features) == len(labels), \ 'features (%i) and labels (%i) should have equal length' \ @@ -1376,7 +1376,7 @@ def compute_metric_tpfp_tpfn(annot, segm, label_positive=None): # return stat -class HoldOut: +class HoldOut(object): """ Hold-out cross-validator generator. In the hold-out, the data is split only once into a train set and a test set. @@ -1385,9 +1385,9 @@ class HoldOut: Parameters ---------- - nb : total number of samples - hold_idx : int index where the test starts - random_state : Seed for the random number generator. + nb_samples : int, total number of samples + hold_out : int, number where the test starts + rand_seed : seed for the random number generator Example ------- @@ -1435,15 +1435,16 @@ def __len__(self): return 1 -class CrossValidatePOut: +class CrossValidatePOut(object): """ Hold-out cross-validator generator. In the hold-out, the data is split only once into a train set and a test set. - Unlike in other cross-validation schemes, the hold-out - consists of only one iteration. Parameters ---------- + nb_samples : integer, total number of samples + nb_hold_out : integer, number of samples hold out + rand_seed : seed for the random number generator Example 1 --------- @@ -1455,6 +1456,8 @@ class CrossValidatePOut: >>> list(cv) # doctest: +NORMALIZE_WHITESPACE [([3, 4, 5], [0, 1, 2]), ([0, 1, 2], [3, 4, 5])] + >>> [(len(tr), len(ts)) for tr, ts in CrossValidatePOut(340, 0.41)] + [(201, 139), (201, 139), (201, 139)] Example 2 --------- @@ -1462,8 +1465,8 @@ class CrossValidatePOut: >>> list(cv) # doctest: +NORMALIZE_WHITESPACE [([3, 0, 5, 4], [6, 2, 1]), ([6, 2, 1, 4], [3, 0, 5]), - ([6, 2, 1, 3, 0, 5], [4])] - >>> len(list(cv)) + ([1, 3, 0, 5], [4, 6, 2])] + >>> len(cv) 3 >>> cv.indexes [6, 2, 1, 3, 0, 5, 4] @@ -1475,20 +1478,24 @@ class CrossValidatePOut: [([6, 2], [1, 3, 0, 5, 4]), ([1, 3], [6, 2, 0, 5, 4]), ([0, 5], [6, 2, 1, 3, 4]), - ([4], [6, 2, 1, 3, 0, 5])] + ([4, 6], [2, 1, 3, 0, 5])] + >>> [(len(tr), len(ts)) for tr, ts in CrossValidatePOut(340, 0.55)] + [(153, 187), (153, 187), (153, 187)] """ def __init__(self, nb_samples, nb_hold_out, rand_seed=None): - """ + """ constructor - :param [int] nb_samples: list of sizes - :param int nb_hold_out: how much hold out + :param int nb_samples: list of sizes + :param int|float nb_hold_out: how much hold out :param int|None rand_seed: """ assert nb_samples > nb_hold_out, \ - 'number of holdout has to be smaller then _total size' + 'number of holdout has to be smaller then total size' + assert nb_hold_out > 0, 'number of holdout has to be positive number' self._nb_samples = nb_samples - self._nb_hold_out = nb_hold_out + self._nb_hold_out = int(np.round(nb_samples * nb_hold_out)) \ + if nb_hold_out < 1 else nb_hold_out self._revert = False # sets the sizes if self._nb_hold_out > (self._nb_samples / 2.): @@ -1501,9 +1508,12 @@ def __init__(self, nb_samples, nb_hold_out, rand_seed=None): self.indexes = list(range(self._nb_samples)) if rand_seed is not None and rand_seed is not False: + self._shuffle = True np.random.seed(rand_seed) np.random.shuffle(self.indexes) - logging.debug('sets ordering: %s', repr(self.indexes)) + else: + self._shuffle = False + logging.debug('sets ordering: %s', repr(np.array(self.indexes))) self.iter = 0 @@ -1513,8 +1523,19 @@ def __iter__(self): :return ([int], [int]): """ for i in range(0, self._nb_samples, self._nb_hold_out): - inds_test = self.indexes[i:i + self._nb_hold_out] - inds_train = [i for i in self.indexes if i not in inds_test] + i_end = i + self._nb_hold_out + inds_test = self.indexes[i:i_end] + inds_train = self.indexes[:i] + self.indexes[i_end:] + # over flow the limited set + if i_end > self._nb_samples: + i_begin = i_end - self._nb_samples + inds_test += self.indexes[:i_begin] + inds_train = self.indexes[i_begin:i] + logging.warning('Your demand for last test fold overflow by %i, ' + 'to keep the train-test ration we reuse part ' + 'of the already tested samples from the %s beginning.', + i_begin, 'shuffled' if self._shuffle else '') + # reverting the train -test split if self._revert: inds_train, inds_test = inds_test, inds_train yield inds_train, inds_test @@ -1527,15 +1548,16 @@ def __len__(self): return int(np.ceil(self._nb_samples / float(self._nb_hold_out))) -class CrossValidatePSetsOut: +class CrossValidatePSetsOut(CrossValidatePOut): """ Hold-out cross-validator generator. In the hold-out, the data is split only once into a train set and a test set. - Unlike in other cross-validation schemes, the hold-out - consists of only one iteration. Parameters ---------- + set_sizes : list of integers, number of samples in each set + nb_hold_out : integer, number of sets hold out + rand_seed : seed for the random number generator Example 1 --------- @@ -1547,6 +1569,8 @@ class CrossValidatePSetsOut: >>> list(cv) # doctest: +NORMALIZE_WHITESPACE [([5, 6, 7, 8, 9], [0, 1, 2, 3, 4]), ([0, 1, 2, 3, 4], [5, 6, 7, 8, 9])] + >>> [(len(tr), len(ts)) for tr, ts in CrossValidatePSetsOut([7] * 340, 0.41)] + [(1407, 973), (1407, 973), (1407, 973)] Example 2 --------- @@ -1556,10 +1580,10 @@ class CrossValidatePSetsOut: >>> list(cv) # doctest: +NORMALIZE_WHITESPACE [([2, 3, 5, 6, 7], [4, 0, 1]), ([4, 0, 1, 7], [2, 3, 5, 6]), - ([4, 0, 1, 2, 3, 5, 6], [7])] + ([0, 1, 2, 3, 5, 6], [7, 4])] >>> len(cv) 3 - >>> cv.sets_order + >>> cv.indexes [2, 0, 1, 3, 4] Example 3 @@ -1569,71 +1593,52 @@ class CrossValidatePSetsOut: [([8, 4], [2, 3, 5, 6, 0, 1, 7]), ([2, 3, 5, 6], [8, 4, 0, 1, 7]), ([0, 1, 7], [8, 4, 2, 3, 5, 6])] + >>> [(len(tr), len(ts)) for tr, ts in CrossValidatePSetsOut([7] * 340, 0.55)] + [(1071, 1309), (1071, 1309), (1071, 1309)] """ def __init__(self, set_sizes, nb_hold_out, rand_seed=None): - """ + """ construct :param [int] set_sizes: list of sizes - :param int nb_hold_out: how much hold out + :param int|float nb_hold_out: how much hold out :param int|None rand_seed: """ - assert len(set_sizes) > nb_hold_out, \ - 'nb of hold out (%i) has to be smaller then _total size %i' \ - % (nb_hold_out, len(set_sizes)) - self._set_sizes = list(set_sizes) - self._total = np.sum(self._set_sizes) - self._nb_hold_out = nb_hold_out - - self._revert = False # sets the sizes - if self._nb_hold_out > (len(self._set_sizes) / 2.): - logging.debug('WARNING: you are running in reverse mode, ' - 'while using all training examples ' - 'there are much more yield test cases.') - self._nb_hold_out = len(self._set_sizes) - self._nb_hold_out - self._revert = True + super(CrossValidatePSetsOut, self).__init__( + len(set_sizes), nb_hold_out, rand_seed) + self._set_sizes = list(set_sizes) self.set_indexes = [] + + start = 0 for i, size in enumerate(self._set_sizes): - start = int(np.sum(self._set_sizes[:i])) inds = range(start, start + size) self.set_indexes.append(list(inds)) + start += size - assert np.sum(len(i) for i in self.set_indexes) == self._total, \ - 'all indexes should sum to _total count %i' % self._total + total = np.sum(self._set_sizes) + assert np.sum(len(i) for i in self.set_indexes) == total, \ + 'all indexes should sum to total count %i' % total - self.sets_order = list(range(len(self._set_sizes))) - - if rand_seed is not None and rand_seed is not False: - np.random.seed(rand_seed) - np.random.shuffle(self.sets_order) - logging.debug('sets ordering: %s', repr(self.sets_order)) + def __iter_indexes(self, sets): + """ return enrol indexes from sets - self.iter = 0 + :param [int] sets: selection of indexes + :return [int]: + """ + inds = list(itertools.chain(*[self.set_indexes[i] for i in sets])) + return inds def __iter__(self): """ iterate the folds :return ([int], [int]): """ - for i in range(0, len(self._set_sizes), self._nb_hold_out): - test = self.sets_order[i:i + self._nb_hold_out] - inds_train = list(itertools.chain.from_iterable( - self.set_indexes[i] for i in self.sets_order if i not in test)) - inds_test = list(itertools.chain.from_iterable( - self.set_indexes[i] for i in self.sets_order if i in test)) - if self._revert: - inds_train, inds_test = inds_test, inds_train + for train, test in super(CrossValidatePSetsOut, self).__iter__(): + inds_train = self.__iter_indexes(train) + inds_test = self.__iter_indexes(test) yield inds_train, inds_test - def __len__(self): - """ number of folds - - :return int: - """ - nb = len(self._set_sizes) / float(self._nb_hold_out) - return int(np.ceil(nb)) - # DEPRECATED # ========== diff --git a/imsegm/descriptors.py b/imsegm/descriptors.py index 8a248171..aa78605e 100755 --- a/imsegm/descriptors.py +++ b/imsegm/descriptors.py @@ -8,6 +8,7 @@ Copyright (C) 2014-2018 Jiri Borovec """ +import itertools import logging import numpy as np @@ -630,15 +631,15 @@ def numpy_img3d_gray_median(img, seg): def compute_image3d_gray_statistic(image, segm, - list_feature_flags=NAMES_FEATURE_FLAGS, + feature_flags=NAMES_FEATURE_FLAGS, ch_name='gray'): """ compute complete descriptors / statistic on gray (3D) images :param ndarray image: :param ndarray segm: segmentation - :param list_feature_flags: + :param [str] feature_flags: :param str ch_name: channel name - :return np.ndarray, [str]: + :return (ndarray, [str]): np.ndarray >>> image = np.zeros((2, 3, 8)) >>> image[0, :, 2:6] = 1 @@ -666,54 +667,42 @@ def compute_image3d_gray_statistic(image, segm, """ _check_gray_image_segm(image, segm) - assert len(list_feature_flags) > 0, 'some features has to be selected' + assert list(feature_flags), 'some features has to be selected' image = np.nan_to_num(image) - features, names = [], [] + features = [] # nb_fts = image.shape[0] # ch_names = ['%s-ch%i' % (ch_name, i + 1) for i in range(nb_fts)] + _fn_mean = cython_img3d_gray_mean if USE_CYTHON else numpy_img3d_gray_mean + _fn_std = cython_img3d_gray_std if USE_CYTHON else numpy_img3d_gray_std + _fn_energy = cython_img3d_gray_energy if USE_CYTHON else numpy_img3d_gray_energy + # MEAN mean = None - if 'mean' in list_feature_flags: - if USE_CYTHON: - mean = cython_img3d_gray_mean(image, segm) - else: - mean = numpy_img3d_gray_mean(image, segm) + if 'mean' in feature_flags: + mean = _fn_mean(image, segm) features.append(mean) - names += ['%s_mean' % ch_name] # Standard Deviation - if 'std' in list_feature_flags: - if USE_CYTHON: - std = cython_img3d_gray_std(image, segm, mean) - else: - std = numpy_img3d_gray_std(image, segm, mean) - features.append(std) - names += ['%s_std' % ch_name] + if 'std' in feature_flags: + features.append(_fn_std(image, segm, mean)) # ENERGY - if 'energy' in list_feature_flags: - if USE_CYTHON: - energy = cython_img3d_gray_energy(image, segm) - else: - energy = numpy_img3d_gray_energy(image, segm) - features.append(energy) - names += ['%s_energy' % ch_name] + if 'energy' in feature_flags: + features.append(_fn_energy(image, segm)) # MEDIAN - if 'median' in list_feature_flags: - median = numpy_img3d_gray_median(image, segm) - features.append(median) - names += ['%s_median' % ch_name] + if 'median' in feature_flags: + features.append(numpy_img3d_gray_median(image, segm)) # mean Gradient - if 'meanGrad' in list_feature_flags: + if 'meanGrad' in feature_flags: grad_matrix = np.zeros_like(image) for i in range(image.shape[0]): grad_matrix[i, :, :] = np.sum(np.gradient(image[i]), axis=0) - if USE_CYTHON: - grad = cython_img3d_gray_mean(grad_matrix, segm) - else: - grad = numpy_img3d_gray_mean(grad_matrix, segm) - features.append(grad) - names += ['%s_meanGrad' % ch_name] - _check_unrecognised_feature_names(list_feature_flags) + features.append(_fn_mean(grad_matrix, segm)) + + names = ['%s_%s' % (ch_name, fts_name) + for fts_name in ('mean', 'std', 'energy', 'median', 'meanGrad') + if fts_name in feature_flags] + _check_unrecognised_feature_names(feature_flags) + features = np.concatenate(tuple([fts] for fts in features), axis=0) features = np.nan_to_num(features).T # normalise +/- zeros as set all as positive @@ -724,15 +713,15 @@ def compute_image3d_gray_statistic(image, segm, def compute_image2d_color_statistic(image, segm, - list_feature_flags=NAMES_FEATURE_FLAGS, + feature_flags=NAMES_FEATURE_FLAGS, color_name='color'): """ compute complete descriptors / statistic on color (2D) images :param ndarray image: :param ndarray segm: segmentation - :param list_feature_flags: + :param [str] feature_flags: :param str color_name: channel name - :return np.ndarray, [str]: + :return (ndarray, [str]): np.ndarray >>> image = np.zeros((2, 10, 3)) >>> image[:, 2:6, 0] = 1 @@ -758,51 +747,38 @@ def compute_image2d_color_statistic(image, segm, image = np.nan_to_num(image) features = np.empty((np.max(segm) + 1, 0)) - names = [] ch_names = ['%s-ch%i' % (color_name, i + 1) for i in range(3)] + _fn_mean = cython_img2d_color_mean if USE_CYTHON else numpy_img2d_color_mean + _fn_std = cython_img2d_color_std if USE_CYTHON else numpy_img2d_color_std + _fn_energy = cython_img2d_color_energy if USE_CYTHON else numpy_img2d_color_energy + # MEAN mean = None - if 'mean' in list_feature_flags: - if USE_CYTHON: - mean = cython_img2d_color_mean(image, segm) - else: - mean = numpy_img2d_color_mean(image, segm) + if 'mean' in feature_flags: + mean = _fn_mean(image, segm) features = np.hstack((features, mean)) - names += ['%s_mean' % n for n in ch_names] # Standard Deviation - if 'std' in list_feature_flags: - if USE_CYTHON: - std = cython_img2d_color_std(image, segm, mean) - else: - std = numpy_img2d_color_std(image, segm, mean) - features = np.hstack((features, std)) - names += ['%s_std' % n for n in ch_names] + if 'std' in feature_flags: + features = np.hstack((features, _fn_std(image, segm, mean))) # ENERGY - if 'energy' in list_feature_flags: - if USE_CYTHON: - energy = cython_img2d_color_energy(image, segm) - else: - energy = numpy_img2d_color_energy(image, segm) - features = np.hstack((features, energy)) - names += ['%s_energy' % n for n in ch_names] - # Median - if 'median' in list_feature_flags: - median = numpy_img2d_color_median(image, segm) - features = np.hstack((features, median)) - names += ['%s_median' % n for n in ch_names] + if 'energy' in feature_flags: + features = np.hstack((features, _fn_energy(image, segm))) + # MEDIAN + if 'median' in feature_flags: + features = np.hstack((features, numpy_img2d_color_median(image, segm))) # mean Gradient - if 'meanGrad' in list_feature_flags: + if 'meanGrad' in feature_flags: grad_matrix = np.zeros_like(image) for i in range(image.shape[-1]): grad_matrix[:, :, i] = np.sum(np.gradient(image[:, :, i]), axis=0) - if USE_CYTHON: - grad = cython_img2d_color_mean(grad_matrix, segm) - else: - grad = numpy_img2d_color_mean(grad_matrix, segm) - features = np.hstack((features, grad)) - names += ['%s_meanGrad' % n for n in ch_names] - _check_unrecognised_feature_names(list_feature_flags) + features = np.hstack((features, _fn_mean(grad_matrix, segm))) + + feature_names = ('mean', 'std', 'energy', 'median', 'meanGrad') + names = list(itertools.chain.from_iterable(['%s_%s' % (n, fts_name) for n in ch_names] + for fts_name in feature_names + if fts_name in feature_flags)) + _check_unrecognised_feature_names(feature_flags) # mean Gradient # G = np.zeros_like(image) # for i in range(image.shape[0]): diff --git a/imsegm/region_growing.py b/imsegm/region_growing.py index 8d2c0f24..caff7b2f 100755 --- a/imsegm/region_growing.py +++ b/imsegm/region_growing.py @@ -71,7 +71,7 @@ def object_segmentation_graphcut_slic(slic, segm, centres, labels_fg_prob = np.array(labels_fg_prob) labels_bg_prob = 1. - labels_fg_prob - assert len(centres) > 0, 'at least one center has to be given' + assert list(centres), 'at least one center has to be given' centres = [np.round(c).astype(int) for c in centres] slic_points = seg_spx.superpixel_centers(slic) @@ -191,7 +191,7 @@ def object_segmentation_graphcut_pixels(segm, centres, labels_fg_prob = np.array(labels_fg_prob) labels_bg_prob = 1. - labels_fg_prob - assert len(centres) > 0, 'at least one center has to be given' + assert list(centres), 'at least one center has to be given' centres = [np.round(c).astype(int) for c in centres] proba = np.ones((height, width, len(centres) + 1)) diff --git a/setup.py b/setup.py index cbe3d5a4..56da25d2 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ def _parse_requirements(file_path): setup( name='ImSegm', - version='0.1.3', + version='0.1.4', url='https://borda.github.io/pyImSegm', author='Jiri Borovec', @@ -67,7 +67,9 @@ def _parse_requirements(file_path): description='superpixel image segmentation: ' '(un)supervised, center detection, region growing', - packages=find_packages(), + packages=find_packages(exclude=['docs', 'notebooks', + 'handling_annotations', + 'experiments_*']), cmdclass={'build_ext': BuildExt}, ext_modules=[Extension('imsegm.features_cython', language='c++',