From ebe6e26e63886c7062a99e561995e0c73e5fcddd Mon Sep 17 00:00:00 2001 From: Alex Savchik Date: Sun, 22 Sep 2024 23:33:26 +0200 Subject: [PATCH] calc_SNR: tests, pd_mode --- test/utils/test_method.py | 31 ++++++++++++++++++++++++++++++- utils/method.py | 39 ++++++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 12 deletions(-) diff --git a/test/utils/test_method.py b/test/utils/test_method.py index 105d36f..7b39528 100644 --- a/test/utils/test_method.py +++ b/test/utils/test_method.py @@ -1,6 +1,7 @@ import pandas as pd import numpy as np -from utils.method import zscore, prepare_input_matrix, get_trend +import warnings +from utils.method import zscore, prepare_input_matrix, get_trend, calc_SNR def test_get_trend_single_point(): @@ -25,6 +26,34 @@ def test_get_trend_noisy(): assert np.allclose(min_snr([1, 1.5, 2]), [0.25, 0.5, 0.75], atol=0.1) +def test_calc_SNR(): + val1 = calc_SNR([0, 1, 2], [1, 2, 3]) + val2 = calc_SNR([0, 1, 2], [1, 2, 3], pd_mode=True) + np.testing.assert_almost_equal(val1, -0.6123724356957945) + np.testing.assert_almost_equal(val2, -0.5) + + # assert not failing + assert calc_SNR([0, 0, 0], [1, 1, 1]) == float("-inf") + assert calc_SNR([0, 0, 0], [1, 1, 1], True) == float("-inf") + assert calc_SNR([1, 1, 1], [0, 0, 0]) == float("+inf") + + # sends warning "overflow encountered" + # big numbers + big_nums = [1e307, 1e307, 1e307] + small_nums = [1e-150, 0, 0] + assert np.std(big_nums) < float("+inf") + assert np.std(small_nums) > 0 + assert calc_SNR(big_nums, small_nums) in [float("inf"), float("+inf")] + + with warnings.catch_warnings(): + warnings.simplefilter("error") + + # zero std + assert calc_SNR([1, 1, 1], [0, 0, 0]) in [float("inf"), float("+inf")] + assert calc_SNR([0, 0, 0], [1, 1, 1]) in [float("inf"), float("-inf")] + + + # def test_zscore(): # # Test case 1: Basic functionality # df = pd.DataFrame({ diff --git a/utils/method.py b/utils/method.py index b776dc5..db6c65c 100644 --- a/utils/method.py +++ b/utils/method.py @@ -166,11 +166,32 @@ def calc_mean_std_by_powers(powers): return mean, std -def calc_SNR(ar1, ar2): - std_sum = np.std(ar1) + np.std(ar2) - mean_diff = np.mean(ar1) - np.mean(ar2) +def calc_SNR(ar1, ar2, pd_mode=False): + """Calculate Signal-to-Noise Ratio (SNR) for two arrays. + + Args: + ar1 (array): first array + ar2 (array): second array + pd_mode (bool): if True, use pandas-like mean/std methods + i.e. n-1 for std, ignore nans + + Returns: + float: SNR value + """ + + if pd_mode: + std = lambda x: np.nanstd(x, ddof=1.0) + mean = np.nanmean + else: + std = np.nanstd + mean = np.mean + + mean_diff = mean(ar1) - mean(ar2) + std_sum = std(ar1) + std(ar2) + if std_sum == 0: - return np.inf*mean_diff + return np.inf * mean_diff + return mean_diff / std_sum @@ -1350,13 +1371,9 @@ def update_bicluster_data(bicluster, data): avg_zscore = data.loc[list(bicluster["genes"]), :].mean() # compute SNR for average z-score for this bicluster - m = avg_zscore[bic_samples].mean() - avg_zscore[bg_samples].mean() - s = avg_zscore[bic_samples].std() + avg_zscore[bg_samples].std() - if s>0: - snr = np.abs(m) / s - else: - snr = np.abs(m) *np.inf - bicluster["SNR"] = snr + bicluster["SNR"] = calc_SNR( + avg_zscore[bic_samples], avg_zscore[bg_samples], pd_mode=True + ) return bicluster