Skip to content

Commit

Permalink
calc_SNR: tests, pd_mode
Browse files Browse the repository at this point in the history
  • Loading branch information
savfod committed Sep 22, 2024
1 parent 418008a commit ebe6e26
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 12 deletions.
31 changes: 30 additions & 1 deletion test/utils/test_method.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pandas as pd
import numpy as np
from utils.method import zscore, prepare_input_matrix, get_trend
import warnings
from utils.method import zscore, prepare_input_matrix, get_trend, calc_SNR


def test_get_trend_single_point():
Expand All @@ -25,6 +26,34 @@ def test_get_trend_noisy():
assert np.allclose(min_snr([1, 1.5, 2]), [0.25, 0.5, 0.75], atol=0.1)


def test_calc_SNR():
val1 = calc_SNR([0, 1, 2], [1, 2, 3])
val2 = calc_SNR([0, 1, 2], [1, 2, 3], pd_mode=True)
np.testing.assert_almost_equal(val1, -0.6123724356957945)
np.testing.assert_almost_equal(val2, -0.5)

# assert not failing
assert calc_SNR([0, 0, 0], [1, 1, 1]) == float("-inf")
assert calc_SNR([0, 0, 0], [1, 1, 1], True) == float("-inf")
assert calc_SNR([1, 1, 1], [0, 0, 0]) == float("+inf")

# sends warning "overflow encountered"
# big numbers
big_nums = [1e307, 1e307, 1e307]
small_nums = [1e-150, 0, 0]
assert np.std(big_nums) < float("+inf")
assert np.std(small_nums) > 0
assert calc_SNR(big_nums, small_nums) in [float("inf"), float("+inf")]

with warnings.catch_warnings():
warnings.simplefilter("error")

# zero std
assert calc_SNR([1, 1, 1], [0, 0, 0]) in [float("inf"), float("+inf")]
assert calc_SNR([0, 0, 0], [1, 1, 1]) in [float("inf"), float("-inf")]



# def test_zscore():
# # Test case 1: Basic functionality
# df = pd.DataFrame({
Expand Down
39 changes: 28 additions & 11 deletions utils/method.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,11 +166,32 @@ def calc_mean_std_by_powers(powers):
return mean, std


def calc_SNR(ar1, ar2):
std_sum = np.std(ar1) + np.std(ar2)
mean_diff = np.mean(ar1) - np.mean(ar2)
def calc_SNR(ar1, ar2, pd_mode=False):
"""Calculate Signal-to-Noise Ratio (SNR) for two arrays.
Args:
ar1 (array): first array
ar2 (array): second array
pd_mode (bool): if True, use pandas-like mean/std methods
i.e. n-1 for std, ignore nans
Returns:
float: SNR value
"""

if pd_mode:
std = lambda x: np.nanstd(x, ddof=1.0)
mean = np.nanmean
else:
std = np.nanstd
mean = np.mean

mean_diff = mean(ar1) - mean(ar2)
std_sum = std(ar1) + std(ar2)

if std_sum == 0:
return np.inf*mean_diff
return np.inf * mean_diff

return mean_diff / std_sum


Expand Down Expand Up @@ -1350,13 +1371,9 @@ def update_bicluster_data(bicluster, data):
avg_zscore = data.loc[list(bicluster["genes"]), :].mean()

# compute SNR for average z-score for this bicluster
m = avg_zscore[bic_samples].mean() - avg_zscore[bg_samples].mean()
s = avg_zscore[bic_samples].std() + avg_zscore[bg_samples].std()
if s>0:
snr = np.abs(m) / s
else:
snr = np.abs(m) *np.inf
bicluster["SNR"] = snr
bicluster["SNR"] = calc_SNR(
avg_zscore[bic_samples], avg_zscore[bg_samples], pd_mode=True
)
return bicluster


Expand Down

0 comments on commit ebe6e26

Please sign in to comment.