calc_SNR: tests, pd_mode

ozolotareva · Sep 22, 2024 · ebe6e26 · ebe6e26
1 parent 418008a
commit ebe6e26
Show file tree

Hide file tree

Showing 2 changed files with 58 additions and 12 deletions.
diff --git a/test/utils/test_method.py b/test/utils/test_method.py
@@ -1,6 +1,7 @@
 import pandas as pd
 import numpy as np
-from utils.method import zscore, prepare_input_matrix, get_trend
+import warnings
+from utils.method import zscore, prepare_input_matrix, get_trend, calc_SNR
 
 
 def test_get_trend_single_point():
@@ -25,6 +26,34 @@ def test_get_trend_noisy():
     assert np.allclose(min_snr([1, 1.5, 2]), [0.25, 0.5, 0.75], atol=0.1)
 
 
+def test_calc_SNR():
+    val1 = calc_SNR([0, 1, 2], [1, 2, 3])
+    val2 = calc_SNR([0, 1, 2], [1, 2, 3], pd_mode=True)
+    np.testing.assert_almost_equal(val1, -0.6123724356957945)
+    np.testing.assert_almost_equal(val2, -0.5)
+
+    # assert not failing
+    assert calc_SNR([0, 0, 0], [1, 1, 1]) == float("-inf")
+    assert calc_SNR([0, 0, 0], [1, 1, 1], True) == float("-inf")
+    assert calc_SNR([1, 1, 1], [0, 0, 0]) == float("+inf")
+
+    # sends warning "overflow encountered" 
+    # big numbers
+    big_nums = [1e307, 1e307, 1e307]
+    small_nums = [1e-150, 0, 0]
+    assert np.std(big_nums) < float("+inf")
+    assert np.std(small_nums) > 0
+    assert calc_SNR(big_nums, small_nums) in [float("inf"), float("+inf")]
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+
+        # zero std
+        assert calc_SNR([1, 1, 1], [0, 0, 0]) in [float("inf"), float("+inf")]
+        assert calc_SNR([0, 0, 0], [1, 1, 1]) in [float("inf"), float("-inf")]
+
+
+
 # def test_zscore():
 #     # Test case 1: Basic functionality
 #     df = pd.DataFrame({

diff --git a/utils/method.py b/utils/method.py
@@ -166,11 +166,32 @@ def calc_mean_std_by_powers(powers):
     return mean, std
 
 
-def calc_SNR(ar1, ar2):
-    std_sum = np.std(ar1) + np.std(ar2)
-    mean_diff = np.mean(ar1) - np.mean(ar2)
+def calc_SNR(ar1, ar2, pd_mode=False):
+    """Calculate Signal-to-Noise Ratio (SNR) for two arrays.
+
+    Args:
+        ar1 (array): first array
+        ar2 (array): second array
+        pd_mode (bool): if True, use pandas-like mean/std methods
+            i.e. n-1 for std, ignore nans
+
+    Returns:
+        float: SNR value
+    """
+
+    if pd_mode:
+        std = lambda x: np.nanstd(x, ddof=1.0)
+        mean = np.nanmean
+    else:
+        std = np.nanstd
+        mean = np.mean
+
+    mean_diff = mean(ar1) - mean(ar2)
+    std_sum = std(ar1) + std(ar2)
+
     if std_sum == 0:
-        return np.inf*mean_diff
+        return np.inf * mean_diff
+
     return mean_diff / std_sum
 
 
@@ -1350,13 +1371,9 @@ def update_bicluster_data(bicluster, data):
         avg_zscore = data.loc[list(bicluster["genes"]), :].mean()
 
     # compute SNR for average z-score for this bicluster
-    m = avg_zscore[bic_samples].mean() - avg_zscore[bg_samples].mean()
-    s = avg_zscore[bic_samples].std() + avg_zscore[bg_samples].std()
-    if s>0:
-        snr = np.abs(m) / s
-    else:
-        snr = np.abs(m) *np.inf
-    bicluster["SNR"] = snr
+    bicluster["SNR"] = calc_SNR(
+        avg_zscore[bic_samples], avg_zscore[bg_samples], pd_mode=True
+    )
     return bicluster