Skip to content

Commit

Permalink
Merge pull request #4 from ozolotareva/small_example
Browse files Browse the repository at this point in the history
Small example
  • Loading branch information
ozolotareva authored Sep 9, 2024
2 parents 9701f5d + b8a8530 commit b2ae0c3
Show file tree
Hide file tree
Showing 7 changed files with 212 additions and 83 deletions.
8 changes: 5 additions & 3 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# action.yml
name: 'Run tests'
name: 'Tests'

on:
# Triggers the workflow on push or pull request events but only for the "main" branch
Expand All @@ -25,6 +25,8 @@ jobs:
entrypoint: bash
args: >
-c "
cd /github/workspace &&
python test/test_run_unpast.py
cd /github/workspace
&& pip install pytest --target /tmp
&& PYTHONPATH=/tmp python -m pytest test -m 'not slow'
&& PYTHONPATH=/tmp python -m pytest test --durations=0
"
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ mkdir -p results

# running UnPaSt with default parameters and example data
command="python run_unpast.py --exprs test/scenario_B500.exprs.tsv.gz --basename results/scenario_B500"
docker run -u $(id -u):$(id -g) -v $(pwd):/data --entrypoint bash freddsle/unpast -c "cd /data && $command"
docker run -u $(id -u):$(id -g) -v "$(pwd)":/data --entrypoint bash freddsle/unpast -c "cd /data && $command"
```


Expand Down
75 changes: 24 additions & 51 deletions test/test_input/synthetic_clear_biclusters.tsv
Original file line number Diff line number Diff line change
@@ -1,51 +1,24 @@
sample_0 sample_1 sample_2 sample_3 sample_4 sample_5 sample_6 sample_7 sample_8 sample_9 sample_10 sample_11 sample_12 sample_13 sample_14 sample_15 sample_16 sample_17 sample_18 sample_19
feature_0 1.042 0.072 1.0 0.03 1.015 0.009 1.019 0.035 1.04 0.054 1.042 0.069 1.02 0.088 1.003 0.067 1.042 0.056 1.014 0.02
feature_1 0.08 0.097 0.031 0.069 0.088 0.089 0.009 0.004 0.017 0.088 0.01 0.042 0.096 0.053 0.069 0.032 0.069 0.083 0.002 0.075
feature_2 1.099 0.075 1.028 0.079 1.01 0.045 1.091 0.029 1.029 0.013 1.002 0.068 1.021 0.027 1.049 0.005 1.057 0.015 1.059 0.07
feature_3 0.01 0.041 0.069 0.041 0.005 0.054 0.066 0.051 0.094 0.059 0.09 0.014 0.014 0.081 0.04 0.017 0.093 0.035 0.075 0.073
feature_4 1.088 0.062 1.075 0.035 1.027 0.09 1.043 0.096 1.066 0.062 1.011 0.095 1.045 0.058 1.041 0.024 1.09 0.057 1.0 0.062
feature_5 0.033 0.053 0.089 0.036 0.091 0.062 0.002 0.093 0.069 0.1 0.017 0.014 0.093 0.07 0.007 0.076 0.075 0.092 0.071 0.012
feature_6 1.002 0.003 1.003 0.025 1.086 0.054 1.055 0.084 1.012 0.028 1.059 0.097 1.056 0.002 1.08 0.023 1.081 0.039 1.086 0.075
feature_7 0.056 0.014 0.006 0.012 0.004 0.011 0.023 0.071 0.056 0.001 0.007 0.097 0.057 0.02 0.025 0.074 0.02 0.058 0.097 0.085
feature_8 1.024 0.049 1.062 0.083 1.016 0.002 1.007 0.049 1.061 0.057 1.032 0.099 1.058 0.038 1.055 0.075 1.067 0.026 1.007 0.037
feature_9 0.063 0.021 0.075 0.007 0.026 0.08 0.019 0.064 0.052 0.092 0.026 0.007 0.074 0.077 0.091 0.093 0.001 0.023 0.062 0.095
feature_10 1.095 0.056 1.092 0.064 1.039 0.049 1.06 0.055 1.093 0.092 1.039 0.096 1.017 0.013 1.014 0.051 1.002 0.095 1.083 0.002
feature_11 0.018 0.033 0.013 0.081 0.034 0.094 0.058 0.088 0.084 0.091 0.046 0.055 0.08 0.029 0.049 0.06 0.002 0.059 0.043 0.081
feature_12 1.032 0.089 1.058 0.018 1.079 0.061 1.005 0.042 1.068 0.092 1.0 0.098 1.038 0.097 1.06 0.083 1.057 0.063 1.029 0.059
feature_13 0.075 0.086 0.076 0.07 0.086 0.032 0.067 0.045 0.038 0.041 0.04 0.032 0.062 0.043 0.097 0.068 0.02 0.043 0.034 0.08
feature_14 1.088 0.09 1.066 0.027 1.025 0.085 1.053 0.08 1.057 0.073 1.052 0.077 1.057 0.047 1.034 0.007 1.038 0.008 1.098 0.018
feature_15 0.081 0.087 0.069 0.057 0.016 0.047 0.035 0.023 0.059 0.031 0.092 0.091 0.026 0.011 0.019 0.05 0.073 0.021 0.025 0.085
feature_16 1.042 0.062 1.023 0.01 1.052 0.048 1.015 0.062 1.054 0.065 1.014 0.075 1.022 0.052 1.079 0.002 1.032 0.087 1.084 0.054
feature_17 0.087 0.095 0.083 0.085 0.01 0.065 0.07 0.061 0.08 0.003 0.077 0.073 0.026 0.026 0.063 0.035 0.08 0.045 0.078 0.099
feature_18 1.03 0.014 1.09 0.054 1.097 0.064 1.099 0.055 1.053 0.014 1.036 0.003 1.016 0.075 1.003 0.037 1.086 0.069 1.069 0.019
feature_19 0.044 0.058 0.099 0.02 0.025 0.026 0.075 0.046 0.006 0.051 0.021 0.08 0.03 0.003 0.059 0.084 0.038 0.075 0.051 0.054
feature_20 1.096 0.08 1.003 0.071 1.047 0.095 1.022 0.027 1.008 0.043 1.011 0.063 1.08 0.07 1.077 0.034 1.085 0.043 1.082 0.063
feature_21 0.014 0.008 0.002 0.007 0.046 0.011 0.003 0.075 0.039 0.075 0.045 0.045 0.048 0.047 0.08 0.04 0.09 0.004 0.077 0.013
feature_22 1.062 0.001 1.054 0.0 1.095 0.091 1.08 0.092 1.015 0.016 1.019 0.062 1.091 0.099 1.071 0.073 1.091 0.04 1.025 0.017
feature_23 0.012 0.081 0.015 0.026 0.082 0.031 0.098 0.027 0.053 0.031 0.091 0.037 0.043 0.051 0.094 0.003 0.072 0.089 0.003 0.052
feature_24 1.033 0.086 1.056 0.069 1.045 0.063 1.029 0.001 1.058 0.031 1.052 0.092 1.043 0.025 1.037 0.093 1.094 0.084 1.092 0.023
feature_25 0.009 0.023 0.031 0.017 0.061 0.041 0.082 0.019 0.07 0.024 0.057 0.035 0.006 0.023 0.066 0.05 0.052 0.017 0.057 0.1
feature_26 1.082 0.059 1.098 0.09 1.06 0.003 1.009 0.007 1.045 0.038 1.098 0.017 1.097 0.077 1.082 0.063 1.067 0.048 1.001 0.035
feature_27 0.049 0.073 0.047 0.046 0.014 0.001 0.076 0.032 0.098 0.022 0.034 0.052 0.075 0.046 0.012 0.031 0.05 0.067 0.077 0.013
feature_28 1.002 0.052 1.081 0.001 1.067 0.069 1.045 0.091 1.064 0.001 1.048 0.086 1.083 0.065 1.067 0.058 1.027 0.056 1.067 0.035
feature_29 0.086 0.02 0.075 0.029 0.077 0.043 0.081 0.035 0.021 0.077 0.031 0.073 0.074 0.022 0.021 0.02 0.014 0.038 0.003 0.011
feature_30 1.067 0.08 1.008 0.023 1.021 0.092 1.071 0.055 1.03 0.083 1.044 0.092 1.071 0.048 1.013 0.098 1.016 0.02 1.043 0.04
feature_31 0.015 0.073 0.019 0.064 0.075 0.021 0.06 0.075 0.064 0.06 0.03 0.073 0.095 0.043 0.078 0.006 0.084 0.019 0.04 0.03
feature_32 1.008 0.09 1.037 0.053 1.049 0.013 1.021 0.008 1.051 0.026 1.036 0.011 1.079 0.011 1.099 0.018 1.057 0.004 1.079 0.019
feature_33 0.053 0.074 0.015 0.055 0.022 0.076 0.072 0.018 0.086 0.002 0.086 0.056 0.04 0.076 0.072 0.099 0.028 0.0 0.093 0.086
feature_34 1.073 0.052 1.071 0.078 1.037 0.077 1.075 0.061 1.04 0.07 1.0 0.077 1.09 0.024 1.012 0.022 1.03 0.088 1.054 0.029
feature_35 0.014 0.029 0.061 0.032 0.046 0.044 0.083 0.043 0.035 0.067 0.022 0.047 0.031 0.063 0.088 0.045 0.078 0.046 0.066 0.013
feature_36 1.043 0.091 1.061 0.077 1.05 0.05 1.084 0.007 1.057 0.094 1.052 0.019 1.085 0.025 1.07 0.054 1.095 0.062 1.084 0.001
feature_37 0.099 0.008 0.032 0.095 0.001 0.082 0.086 0.044 0.026 0.08 0.048 0.013 0.093 0.09 0.049 0.086 0.042 0.068 0.04 0.051
feature_38 1.019 0.096 1.029 0.01 1.014 0.001 1.072 0.056 1.079 0.051 1.079 0.07 1.078 0.041 1.065 0.018 1.032 0.017 1.041 0.024
feature_39 0.041 0.098 0.032 0.098 0.064 0.038 0.086 0.062 0.025 0.079 0.043 0.036 0.033 0.07 0.027 0.081 0.03 0.054 0.049 0.086
feature_40 1.089 0.018 1.059 0.09 1.045 0.092 1.028 0.061 1.068 0.023 1.001 0.042 1.094 0.034 1.078 0.017 1.034 0.014 1.072 0.07
feature_41 0.069 0.025 0.069 0.023 0.042 0.037 0.036 0.006 0.063 0.071 0.061 0.065 0.017 0.015 0.051 0.088 0.018 0.046 0.043 0.05
feature_42 1.016 0.034 1.026 0.084 1.08 0.043 1.061 0.015 1.051 0.03 1.086 0.067 1.063 0.012 1.047 0.099 1.095 0.065 1.015 0.064
feature_43 0.057 0.047 0.043 0.06 0.085 0.075 0.058 0.092 0.006 0.099 0.005 0.02 0.042 0.011 0.062 0.005 0.028 0.006 0.07 0.067
feature_44 1.038 0.019 1.075 0.034 1.08 0.049 1.053 0.003 1.064 0.035 1.023 0.043 1.038 0.047 1.098 0.036 1.077 0.055 1.089 0.035
feature_45 0.025 0.091 0.004 0.095 0.056 0.038 0.1 0.006 0.052 0.003 0.057 0.018 0.063 0.098 0.087 0.045 0.071 0.078 0.049 0.053
feature_46 1.015 0.037 1.014 0.073 1.048 0.045 1.089 0.053 1.041 0.027 1.007 0.042 1.003 0.029 1.05 0.097 1.011 0.067 1.05 0.078
feature_47 0.014 0.008 0.04 0.08 0.019 0.077 0.029 0.022 0.002 0.04 0.038 0.066 0.007 0.015 0.002 0.011 0.065 0.04 0.032 0.056
feature_48 1.099 0.083 1.07 0.092 1.004 0.007 1.047 0.035 1.094 0.049 1.054 0.09 1.045 0.088 1.025 0.027 1.033 0.055 1.022 0.067
feature_49 0.014 0.009 0.087 0.024 0.039 0.057 0.053 0.008 0.087 0.095 0.081 0.028 0.053 0.034 0.055 0.097 0.031 0.067 0.033 0.077
sample_0 sample_1 sample_2 sample_3 sample_4 sample_5 sample_6 sample_7 sample_8 sample_9 sample_10 sample_11 sample_12 sample_13 sample_14 sample_15 sample_16 sample_17 sample_18 sample_19 sample_20 sample_21 sample_22
feature_0 0.042 0.072 0.0 0.03 0.015 0.009 0.019 0.035 0.04 0.054 0.042 0.069 0.02 0.088 0.003 0.067 0.042 0.056 0.014 0.02 0.08 0.097 0.031
feature_1 0.069 1.088 0.089 1.009 0.004 1.017 0.088 1.01 0.042 1.096 0.053 1.069 0.032 1.069 0.083 1.002 0.075 1.099 0.075 1.028 0.079 1.01 0.045
feature_2 0.091 0.029 0.029 0.013 0.002 0.068 0.021 0.027 0.049 0.005 0.057 0.015 0.059 0.07 0.01 0.041 0.069 0.041 0.005 0.054 0.066 0.051 0.094
feature_3 0.059 1.09 0.014 1.014 0.081 1.04 0.017 1.093 0.035 1.075 0.073 1.088 0.062 1.075 0.035 1.027 0.09 1.043 0.096 1.066 0.062 1.011 0.095
feature_4 0.045 0.058 0.041 0.024 0.09 0.057 0.0 0.062 0.033 0.053 0.089 0.036 0.091 0.062 0.002 0.093 0.069 0.1 0.017 0.014 0.093 0.07 0.007
feature_5 0.076 1.075 0.092 1.071 0.012 1.002 0.003 1.003 0.025 1.086 0.054 1.055 0.084 1.012 0.028 1.059 0.097 1.056 0.002 1.08 0.023 1.081 0.039
feature_6 0.086 0.075 0.056 0.014 0.006 0.012 0.004 0.011 0.023 0.071 0.056 0.001 0.007 0.097 0.057 0.02 0.025 0.074 0.02 0.058 0.097 0.085 0.024
feature_7 0.049 1.062 0.083 1.016 0.002 1.007 0.049 1.061 0.057 1.032 0.099 1.058 0.038 1.055 0.075 1.067 0.026 1.007 0.037 1.063 0.021 1.075 0.007
feature_8 0.026 0.08 0.019 0.064 0.052 0.092 0.026 0.007 0.074 0.077 0.091 0.093 0.001 0.023 0.062 0.095 0.095 0.056 0.092 0.064 0.039 0.049 0.06
feature_9 0.055 1.093 0.092 1.039 0.096 1.017 0.013 1.014 0.051 1.002 0.095 1.083 0.002 1.018 0.033 1.013 0.081 1.034 0.094 1.058 0.088 1.084 0.091
feature_10 0.046 0.055 0.08 0.029 0.049 0.06 0.002 0.059 0.043 0.081 0.032 0.089 0.058 0.018 0.079 0.061 0.005 0.042 0.068 0.092 0.0 0.098 0.038
feature_11 0.097 1.06 0.083 1.057 0.063 1.029 0.059 1.075 0.086 1.076 0.07 1.086 0.032 1.067 0.045 1.038 0.041 1.04 0.032 1.062 0.043 1.097 0.068
feature_12 0.02 0.043 0.034 0.08 0.088 0.09 0.066 0.027 0.025 0.085 0.053 0.08 0.057 0.073 0.052 0.077 0.057 0.047 0.034 0.007 0.038 0.008 0.098
feature_13 0.018 1.081 0.087 1.069 0.057 1.016 0.047 1.035 0.023 1.059 0.031 1.092 0.091 1.026 0.011 1.019 0.05 1.073 0.021 1.025 0.085 1.042 0.062
feature_14 0.023 0.01 0.052 0.048 0.015 0.062 0.054 0.065 0.014 0.075 0.022 0.052 0.079 0.002 0.032 0.087 0.084 0.054 0.087 0.095 0.083 0.085 0.01
feature_15 0.065 1.07 0.061 1.08 0.003 1.077 0.073 1.026 0.026 1.063 0.035 1.08 0.045 1.078 0.099 1.03 0.014 1.09 0.054 1.097 0.064 1.099 0.055
feature_16 0.053 0.014 0.036 0.003 0.016 0.075 0.003 0.037 0.086 0.069 0.069 0.019 0.044 0.058 0.099 0.02 0.025 0.026 0.075 0.046 0.006 0.051 0.021
feature_17 0.08 1.03 0.003 1.059 0.084 1.038 0.075 1.051 0.054 1.096 0.08 1.003 0.071 1.047 0.095 1.022 0.027 1.008 0.043 1.011 0.063 1.08 0.07
feature_18 0.077 0.034 0.085 0.043 0.082 0.063 0.014 0.008 0.002 0.007 0.046 0.011 0.003 0.075 0.039 0.075 0.045 0.045 0.048 0.047 0.08 0.04 0.09
feature_19 0.004 1.077 0.013 1.062 0.001 1.054 0.0 1.095 0.091 1.08 0.092 1.015 0.016 1.019 0.062 1.091 0.099 1.071 0.073 1.091 0.04 1.025 0.017
feature_20 0.012 0.081 0.015 0.026 0.082 0.031 0.098 0.027 0.053 0.031 0.091 0.037 0.043 0.051 0.094 0.003 0.072 0.089 0.003 0.052 0.033 0.086 0.056
feature_21 0.069 1.045 0.063 1.029 0.001 1.058 0.031 1.052 0.092 1.043 0.025 1.037 0.093 1.094 0.084 1.092 0.023 1.009 0.023 1.031 0.017 1.061 0.041
feature_22 0.082 0.019 0.07 0.024 0.057 0.035 0.006 0.023 0.066 0.05 0.052 0.017 0.057 0.1 0.082 0.059 0.098 0.09 0.06 0.003 0.009 0.007 0.045
6 changes: 6 additions & 0 deletions test/test_input/synthetic_small_example.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
sample_0 sample_1 sample_2 sample_3 sample_4
feature_0 1.055 1.071 0.029 0.051 0.089
feature_1 1.09 1.013 0.021 0.005 0.044
feature_2 0.003 0.046 0.065 0.028 0.068
feature_3 0.059 0.002 0.056 0.026 0.042
feature_4 0.028 0.069 0.044 0.016 0.054
87 changes: 60 additions & 27 deletions test/test_run_unpast.py
Original file line number Diff line number Diff line change
@@ -1,73 +1,106 @@
"""Tests for run_unpast, and hence all the core code. Usage: python -m pytest test/test_run_unpast.py"""
import os
import sys
import pandas as pd
import pytest

TEST_DIR = os.path.dirname(__file__)
RESULTS_DIR = os.path.join(TEST_DIR, "results")
if not os.access(RESULTS_DIR, os.W_OK):
# repo dir is currently read-only during github-actions testing
RESULTS_DIR = '/tmp/unpast/results'
# repo dir is currently read-only during the tesging stage in github-action
RESULTS_DIR = "/tmp/unpast/results"
REFERENCE_OUTPUT_DIR = os.path.join(TEST_DIR, "test_reference_output")

sys.path.append(os.path.join(TEST_DIR, ".."))
# sys.path.append(os.path.join(TEST_DIR, ".."))
from run_unpast import run


### Helper functions ###

def run_unpast_on_file(filename, basename):

def run_unpast_on_file(filename, basename, *args, **kwargs):
run(
os.path.join(TEST_DIR, filename),
out_dir=RESULTS_DIR,
basename=basename,
*args,
**kwargs,
)
return parse_answer(RESULTS_DIR, basename)


def parse_answer(answer_dir, basename):
files = os.listdir(answer_dir)
answer_files = [f for f in files if f.startswith(basename) and f.endswith("biclusters.tsv")]
answer_files = [
f for f in files if f.startswith(basename) and f.endswith("biclusters.tsv")
]
assert len(answer_files) == 1, f"There are {len(answer_files)} files instead of 1"
return pd.read_csv(os.path.join(answer_dir, answer_files[0]), sep="\t", comment='#')
return pd.read_csv(os.path.join(answer_dir, answer_files[0]), sep="\t", comment="#")


def parse_to_features_samples_ids(answer):
def to_set_of_nums(s):
return set(map(int, s.strip().split()))

return (
to_set_of_nums(answer["gene_indexes"]),
to_set_of_nums(answer["sample_indexes"]),
)


### Tests ###


@pytest.mark.slow
def test_smoke():
"""Smoke test - check that the program runs on some input without failure."""
run_unpast_on_file(
filename = "test_input/synthetic_clear_biclusters.tsv",
basename = "test_smoke",
filename="test_input/synthetic_clear_biclusters.tsv",
basename="test_smoke",
)


@pytest.mark.slow
def test_simple():
"""Check that clear biclusters are found."""
res = run_unpast_on_file(
filename="test_input/synthetic_small_example.tsv",
basename="test_simple",
min_n_samples=2,
clust_method="Louvain",
)
assert len(res) == 1, "Too many clusters found"
features, samples = parse_to_features_samples_ids(res.iloc[0])
assert features == {0, 1}
assert samples == {0, 1}


@pytest.mark.slow
def test_clear_biclusters():
"""Check that clear biclusters are found."""
res = run_unpast_on_file(
filename = "test_input/synthetic_clear_biclusters.tsv",
basename = "test_clear_biclusters",
filename="test_input/synthetic_clear_biclusters.tsv",
basename="test_clear_biclusters",
)
raise NotImplementedError("TODO: Implement this test")

found_correct_bicluster = False
for _, row in res.iterrows():
features, samples = parse_to_features_samples_ids(row)
if features == set(range(1, 22, 2)) and samples == set(range(1, 22, 2)):
found_correct_bicluster = True

assert found_correct_bicluster


@pytest.mark.slow
def test_reproducible():
"""Check that the same data is found on a complicated input with no clear answer."""
res = run_unpast_on_file(
filename = "test_input/synthetic_noise.tsv",
basename = "test_reproducible",
filename="test_input/synthetic_noise.tsv",
basename="test_reproducible",
)
reference = parse_answer(
answer_dir = REFERENCE_OUTPUT_DIR,
basename = "test_reproducible",
answer_dir=REFERENCE_OUTPUT_DIR,
basename="test_reproducible",
)
assert res.equals(reference), "The results are not reproducible"


if __name__ == "__main__":
# run all the tests in this file
test_smoke()
test_reproducible()

# deselected test
# test_clear_biclusters()

# TODO: use pytest instead
# pytest.main([f"{os.path.abspath(__file__)}"])
98 changes: 98 additions & 0 deletions test/utils/test_method.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import pandas as pd
import numpy as np
from utils.method import zscore, prepare_input_matrix, get_trend


def test_get_trend_single_point():
sizes = [10]
thresholds = [2.5]
min_snr = get_trend(sizes, thresholds, plot=False, verbose=False)
assert min_snr(10) == 2.5


def test_get_trend_multiple_points():
sizes = [10, 20, 30, 40, 50]
thresholds = [2.5, 3.0, 3.5, 4.0, 4.5]
min_snr = get_trend(sizes, thresholds, plot=False, verbose=False)
assert np.allclose(min_snr(sizes), thresholds)


def test_get_trend_noisy():
sizes = [1] * 100 + [2] * 100
thresholds = np.linspace(0, 1, 200)
min_snr = get_trend(sizes, thresholds, plot=False, verbose=False)
# 0.1 tolerance, as set of used points may be not very big
assert np.allclose(min_snr([1, 1.5, 2]), [0.25, 0.5, 0.75], atol=0.1)


# def test_zscore():
# # Test case 1: Basic functionality
# df = pd.DataFrame({
# 'A': [1, 2, 3],
# 'B': [4, 5, 6],
# 'C': [7, 8, 9]
# })
# result = zscore(df)
# assert result.shape == df.shape
# assert np.allclose(result.mean(), 0, atol=1e-7)
# assert np.allclose(result.std(), 1, atol=1e-7)

# # Test case 2: Handling zero variance
# df_zero_var = pd.DataFrame({
# 'A': [1, 1, 1],
# 'B': [2, 3, 4],
# 'C': [5, 6, 7]
# })
# result_zero_var = zscore(df_zero_var)
# assert result_zero_var.shape == df_zero_var.shape
# assert np.allclose(result_zero_var.loc['A'], 0)

# def test_prepare_input_matrix():
# # Test case 1: Basic functionality
# df = pd.DataFrame({
# 'A': [1, 2, 3, 4, 5],
# 'B': [2, 3, 4, 5, 6],
# 'C': [3, 4, 5, 6, 7]
# })
# result = prepare_input_matrix(df)
# assert result.shape == df.shape
# assert np.allclose(result.mean(), 0, atol=1e-7)
# assert np.allclose(result.std(), 1, atol=1e-7)

# # Test case 2: Handling zero variance
# df_zero_var = pd.DataFrame({
# 'A': [1, 1, 1, 1, 1],
# 'B': [2, 3, 4, 5, 6],
# 'C': [3, 4, 5, 6, 7]
# })
# result_zero_var = prepare_input_matrix(df_zero_var)
# assert result_zero_var.shape == (2, 5) # One row should be dropped

# # Test case 3: Handling missing values
# df_missing = pd.DataFrame({
# 'A': [1, 2, np.nan, 4, 5],
# 'B': [2, 3, 4, np.nan, 6],
# 'C': [3, 4, 5, 6, 7]
# })
# result_missing = prepare_input_matrix(df_missing, min_n_samples=4)
# assert result_missing.shape == (2, 5) # One row should be dropped

# # Test case 4: Ceiling
# df_ceiling = pd.DataFrame({
# 'A': [1, 2, 3, 4, 5],
# 'B': [2, 3, 4, 5, 6],
# 'C': [3, 4, 5, 6, 7]
# })
# result_ceiling = prepare_input_matrix(df_ceiling, ceiling=2)
# assert result_ceiling.max().max() <= 2
# assert result_ceiling.min().min() >= -2

# # Test case 5: Non-standardized input
# df_non_std = pd.DataFrame({
# 'A': [10, 20, 30, 40, 50],
# 'B': [20, 30, 40, 50, 60],
# 'C': [30, 40, 50, 60, 70]
# })
# result_non_std = prepare_input_matrix(df_non_std)
# assert np.allclose(result_non_std.mean(), 0, atol=1e-7)
# assert np.allclose(result_non_std.std(), 1, atol=1e-7)
19 changes: 18 additions & 1 deletion utils/method.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,7 +219,24 @@ def generate_null_dist(


def get_trend(sizes, thresholds, plot=True, verbose=True):
# smoothens the trend and retunrs a function min_SNR(size; p-val. cutoff)
"""
Smoothens the trend and retunrs a function min_SNR(size; p-val. cutoff)
Given a set of points (x_i, y_i),
returns a function f(x) that interpolates the data with LOWESS+linear interpolation
Args:
sizes: values of x_i
thresholds: values of y_i
plot: if True, plots the trend
verbose: if True, prints the LOWESS frac
Returns:
get_min_snr: a function that returns the minimal SNR for a given size
"""
assert len(sizes) >= 0
if len(sizes) == 1:
return lambda x: thresholds[0]

lowess = sm.nonparametric.lowess
frac = max(1, min(math.floor(int(0.1 * len(sizes))), 15) / len(sizes))
# if verbose:
Expand Down

0 comments on commit b2ae0c3

Please sign in to comment.