Skip to content

Commit

Permalink
pairwise tests
Browse files Browse the repository at this point in the history
  • Loading branch information
SkBlaz committed Oct 15, 2023
1 parent dd83868 commit fc76b4a
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 1 deletion.
18 changes: 18 additions & 0 deletions benchmarks/generator_naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,10 @@ def generate_random_matrix(num_features, size=2000000):
os.path.join(args.verify_outputs, 'feature_singles.tsv'), sep='\t',
)

rankings_pairwise = pd.read_csv(
os.path.join(args.verify_outputs, 'pairwise_ranks.tsv'), sep='\t',
)

# Partial match test
if rankings.iloc[2]['Feature'] != 'f31-(90; 100)' and rankings.iloc[2]['Score MI-numba-randomized'] > 0.9:
raise Exception(
Expand All @@ -83,3 +87,17 @@ def generate_random_matrix(num_features, size=2000000):
logger.info(
f'Identified the appropriate feature in the haystack ({rankings.iloc[1].Feature})',
)


# Tests related to pairwise rankings
sorted_by_scores = rankings_pairwise.sort_values(by=['Score', 'FeatureA'])

if len(sorted_by_scores) < 10000:
Exception('Number of pairwise comparisons insufficient!')
else:
logger.info('Found enough pairwise comparisons ..')

if sorted_by_scores.iloc[-1]['FeatureA'] == 'f45-(90; 100)' and sorted_by_scores.iloc[-1]['FeatureB'] == 'f45-(90; 100)' and sorted_by_scores.iloc[-1]['Score'] > 1.0:
logger.info('Similarity check passed for f45 ..')
else:
raise Exception('Most similar features not identified ..')
5 changes: 4 additions & 1 deletion scripts/run_benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@ then
python generator_naive.py --output_df_name dataset_naive --num_features 100 --size 10000;

# Substantial subsampling must retrieve the needle.
outrank --data_path dataset_naive --data_source csv-raw --subsampling 1 --task all --heuristic MI-numba-randomized --target_ranking_only True --interaction_order 1 --output_folder ./ranking_outputs --minibatch_size 20000;
outrank --data_path dataset_naive --data_source csv-raw --subsampling 1 --task all --heuristic MI-numba-randomized --target_ranking_only False --interaction_order 1 --output_folder ./ranking_outputs --minibatch_size 20000;

python generator_naive.py --verify_outputs ranking_outputs;

rm -r ranking_outputs dataset_naive;

python generator_naive.py --output_df_name dataset_naive --num_features 100 --size 10000;

exit
fi
###################################################################
Expand Down

0 comments on commit fc76b4a

Please sign in to comment.