Skip to content

Commit

Permalink
Think about filtering out matching distributions
Browse files Browse the repository at this point in the history
  • Loading branch information
gevtushenko committed May 2, 2023
1 parent b1dae29 commit 5c7b408
Showing 1 changed file with 23 additions and 2 deletions.
25 changes: 23 additions & 2 deletions benchmarks/scripts/analyze.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import mannwhitneyu


def get_bench_columns():
Expand Down Expand Up @@ -146,6 +147,23 @@ def extract_scores(df):
return result.sort_values(by=['score'], ascending=False)


def distributions_are_different(alpha, row):
ref_samples = row['base_samples']
cmp_samples = row['samples']

# H0: the distributions are not different
# H1: the distribution are different
_, p = mannwhitneyu(ref_samples, cmp_samples)

# Reject H0
return p < alpha


def remove_matching_distributions(alpha, df):
closure = functools.partial(distributions_are_different, alpha)
return df[df.apply(closure, axis=1)]


def iterate_case_dfs(args, callable):
storage = cub.bench.Storage()
pattern = re.compile(args.R)
Expand All @@ -172,13 +190,14 @@ def iterate_case_dfs(args, callable):
callable(algname, point_str, case_df)


def case_top(N, algname, ct_point_name, case_df):
def case_top(alpha, N, algname, ct_point_name, case_df):
print("{}[{}]:".format(algname, ct_point_name))
case_df = extract_complete_variants(remove_matching_distributions(alpha, case_df))
print(extract_scores(case_df).head(N))


def top(args):
iterate_case_dfs(args, functools.partial(case_top, args.top))
iterate_case_dfs(args, functools.partial(case_top, args.alpha, args.top))


def case_coverage(algname, ct_point_name, case_df):
Expand Down Expand Up @@ -280,6 +299,8 @@ def parse_arguments():
'--coverage', action=argparse.BooleanOptionalAction, help="Show variant space coverage.")
parser.add_argument(
'--top', default=7, type=int, action='store', nargs='?', help="Show top N variants with highest score.")
parser.add_argument(
'--alpha', default=0.5, type=float)
parser.add_argument(
'--variants', type=str, help="Show matching variants data.")
return parser.parse_args()
Expand Down

0 comments on commit 5c7b408

Please sign in to comment.