Add threshold param for metrics (#68) (#106)

* Add `threshold` param for metrics (#68) * Add test for `threshold` * Smooth set to 1. (metrics and losses)
qubvel · May 23, 2019 · a08ba93 · a08ba93
1 parent ce52a1a
commit a08ba93
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 18 deletions.
diff --git a/segmentation_models/losses.py b/segmentation_models/losses.py
@@ -5,7 +5,7 @@
 
 from .metrics import jaccard_score, f_score
 
-SMOOTH = 1e-12
+SMOOTH = 1.
 
 __all__ = [
     'jaccard_loss', 'bce_jaccard_loss', 'cce_jaccard_loss',

diff --git a/segmentation_models/metrics.py b/segmentation_models/metrics.py
@@ -6,13 +6,13 @@
     'get_f_score', 'get_iou_score', 'get_jaccard_score',
 ]
 
-SMOOTH = 1e-12
+SMOOTH = 1.
 
 
 # ============================ Jaccard/IoU score ============================
 
 
-def iou_score(gt, pr, class_weights=1., smooth=SMOOTH, per_image=True):
+def iou_score(gt, pr, class_weights=1., smooth=SMOOTH, per_image=True, threshold=None):
     r""" The `Jaccard index`_, also known as Intersection over Union and the Jaccard similarity coefficient
     (originally coined coefficient de communauté by Paul Jaccard), is a statistic used for comparing the
     similarity and diversity of sample sets. The Jaccard coefficient measures similarity between finite sample sets,
@@ -27,6 +27,7 @@ def iou_score(gt, pr, class_weights=1., smooth=SMOOTH, per_image=True):
         smooth: value to avoid division by zero
         per_image: if ``True``, metric is calculated as mean over images in batch (B),
             else over whole batch
+        threshold: value to round predictions (use ``>`` comparison), if ``None`` prediction prediction will not be round
 
     Returns:
         IoU/Jaccard score in range [0, 1]
@@ -38,6 +39,10 @@ def iou_score(gt, pr, class_weights=1., smooth=SMOOTH, per_image=True):
         axes = [1, 2]
     else:
         axes = [0, 1, 2]
+
+    if threshold is not None:
+        pr = K.greater(pr, threshold)
+        pr = K.cast(pr, K.floatx())
 
     intersection = K.sum(gt * pr, axis=axes)
     union = K.sum(gt + pr, axis=axes) - intersection
@@ -53,20 +58,21 @@ def iou_score(gt, pr, class_weights=1., smooth=SMOOTH, per_image=True):
     return iou
 
 
-def get_iou_score(class_weights=1., smooth=SMOOTH, per_image=True):
+def get_iou_score(class_weights=1., smooth=SMOOTH, per_image=True, threshold=None):
     """Change default parameters of IoU/Jaccard score
 
     Args:
         class_weights: 1. or list of class weights, len(weights) = C
         smooth: value to avoid division by zero
         per_image: if ``True``, metric is calculated as mean over images in batch (B),
             else over whole batch
+        threshold: value to round predictions (use ``>`` comparison), if ``None`` prediction prediction will not be round
 
     Returns:
         ``callable``: IoU/Jaccard score
     """
     def score(gt, pr):
-        return iou_score(gt, pr, class_weights=class_weights, smooth=smooth, per_image=per_image)
+        return iou_score(gt, pr, class_weights=class_weights, smooth=smooth, per_image=per_image, threshold=threshold)
 
     return score
 
@@ -83,7 +89,7 @@ def score(gt, pr):
 
 # ============================== F/Dice - score ==============================
 
-def f_score(gt, pr, class_weights=1, beta=1, smooth=SMOOTH, per_image=True):
+def f_score(gt, pr, class_weights=1, beta=1, smooth=SMOOTH, per_image=True, threshold=None):
     r"""The F-score (Dice coefficient) can be interpreted as a weighted average of the precision and recall,
     where an F-score reaches its best value at 1 and worst score at 0.
     The relative contribution of ``precision`` and ``recall`` to the F1-score are equal.
@@ -110,6 +116,7 @@ def f_score(gt, pr, class_weights=1, beta=1, smooth=SMOOTH, per_image=True):
         smooth: value to avoid division by zero
         per_image: if ``True``, metric is calculated as mean over images in batch (B),
             else over whole batch
+        threshold: value to round predictions (use ``>`` comparison), if ``None`` prediction prediction will not be round
 
     Returns:
         F-score in range [0, 1]
@@ -119,6 +126,10 @@ def f_score(gt, pr, class_weights=1, beta=1, smooth=SMOOTH, per_image=True):
         axes = [1, 2]
     else:
         axes = [0, 1, 2]
+
+    if threshold is not None:
+        pr = K.greater(pr, threshold)
+        pr = K.cast(pr, K.floatx())
 
     tp = K.sum(gt * pr, axis=axes)
     fp = K.sum(pr, axis=axes) - tp
@@ -137,7 +148,7 @@ def f_score(gt, pr, class_weights=1, beta=1, smooth=SMOOTH, per_image=True):
     return score
 
 
-def get_f_score(class_weights=1, beta=1, smooth=SMOOTH, per_image=True):
+def get_f_score(class_weights=1, beta=1, smooth=SMOOTH, per_image=True, threshold=None):
     """Change default parameters of F-score score
 
     Args:
@@ -146,12 +157,13 @@ def get_f_score(class_weights=1, beta=1, smooth=SMOOTH, per_image=True):
         beta: f-score coefficient
         per_image: if ``True``, metric is calculated as mean over images in batch (B),
             else over whole batch
+        threshold: value to round predictions (use ``>`` comparison), if ``None`` prediction prediction will not be round
 
     Returns:
         ``callable``: F-score
     """
     def score(gt, pr):
-        return f_score(gt, pr, class_weights=class_weights, beta=beta, smooth=smooth, per_image=per_image)
+        return f_score(gt, pr, class_weights=class_weights, beta=beta, smooth=smooth, per_image=per_image, threshold=threshold)
 
     return score
 

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
@@ -120,7 +120,7 @@ def test_iou_metric(case):
     gt, pr, res = case
     gt = _to_4d(gt)
     pr = _to_4d(pr)
-    score = K.eval(iou_score(gt, pr))
+    score = K.eval(iou_score(gt, pr, smooth=10e-12))
     assert np.allclose(score, res)
 
 
@@ -129,15 +129,15 @@ def test_jaccrad_loss(case):
     gt, pr, res = case
     gt = _to_4d(gt)
     pr = _to_4d(pr)
-    score = K.eval(jaccard_loss(gt, pr))
+    score = K.eval(jaccard_loss(gt, pr, smooth=10e-12))
     assert np.allclose(score, 1 - res)
 
 
 def _test_f_metric(case, beta=1):
     gt, pr, res = case
     gt = _to_4d(gt)
     pr = _to_4d(pr)
-    score = K.eval(f_score(gt, pr, beta=beta))
+    score = K.eval(f_score(gt, pr, beta=beta, smooth=10e-12))
     assert np.allclose(score, res)
 
 
@@ -156,7 +156,7 @@ def test_dice_loss(case):
     gt, pr, res = case
     gt = _to_4d(gt)
     pr = _to_4d(pr)
-    score = K.eval(dice_loss(gt, pr))
+    score = K.eval(dice_loss(gt, pr, smooth=10e-12))
     assert np.allclose(score, 1 - res)
 
 
@@ -169,10 +169,10 @@ def test_per_image(func):
     pr = _add_4d(pr)
 
     # calculate score per image
-    score_1 = K.eval(func(gt, pr, per_image=True))
+    score_1 = K.eval(func(gt, pr, per_image=True, smooth=10e-12))
     score_2 = np.mean([
-        K.eval(func(_to_4d(GT0), _to_4d(PR1))),
-        K.eval(func(_to_4d(GT1), _to_4d(PR2))),
+        K.eval(func(_to_4d(GT0), _to_4d(PR1), smooth=10e-12)),
+        K.eval(func(_to_4d(GT1), _to_4d(PR2), smooth=10e-12)),
     ])
     assert np.allclose(score_1, score_2)
 
@@ -186,14 +186,23 @@ def test_per_batch(func):
     pr = _add_4d(pr)
 
     # calculate score per batch
-    score_1 = K.eval(func(gt, pr, per_image=False))
+    score_1 = K.eval(func(gt, pr, per_image=False, smooth=10e-12))
 
     gt1 = np.concatenate([GT0, GT1], axis=0)
     pr1 = np.concatenate([PR1, PR2], axis=0)
-    score_2 = K.eval(func(_to_4d(gt1), _to_4d(pr1), per_image=True))
+    score_2 = K.eval(func(_to_4d(gt1), _to_4d(pr1), per_image=True, smooth=10e-12))
 
     assert np.allclose(score_1, score_2)
-
+
+
+@pytest.mark.parametrize('case', IOU_CASES)
+def test_threshold_iou(case):
+    gt, pr, res = case
+    gt = _to_4d(gt)
+    pr = _to_4d(pr) * 0.51
+    score = K.eval(iou_score(gt, pr, smooth=10e-12, threshold=0.5))
+    assert np.allclose(score, res)
+
 
 if __name__ == '__main__':
     pytest.main([__file__])