Skip to content

Commit

Permalink
cache
Browse files Browse the repository at this point in the history
  • Loading branch information
SkBlaz committed Sep 29, 2023
1 parent 97e7adf commit 0eece60
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
25 changes: 23 additions & 2 deletions outrank/core_ranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,31 @@
random.seed(a=123, version=2)
GLOBAL_CARDINALITY_STORAGE: dict[Any, Any] = dict()
GLOBAL_RARE_VALUE_STORAGE: dict[str, Any] = Counter()

GLOBAL_PRIOR_COMB_COUNTS: dict[Any, int] = dict()
IGNORED_VALUES = set()
HYPERLL_ERROR_BOUND = 0.02

def prior_combinations_sample(combinations: list[tuple[Any, ...]], args: Any) -> list[tuple[Any, ...]]:
"""Make sure only relevant subspace of combinations is selected based on prior counts"""

if len(GLOBAL_PRIOR_COMB_COUNTS) == 0:
for combination in combinations:
update_comb_count_cache(combination)
tmp = combinations[:args.combination_number_upper_bound]
else:
tmp = list(x[0] for x in sorted(GLOBAL_PRIOR_COMB_COUNTS.items(), key=lambda x:x[1], reverse=False))[:args.combination_number_upper_bound]

for combination in tmp:
update_comb_count_cache(combination)

return tmp

def update_comb_count_cache(combination: tuple[Any, ...]) -> None:

if combination in GLOBAL_PRIOR_COMB_COUNTS:
GLOBAL_PRIOR_COMB_COUNTS[combination] += 1
else:
GLOBAL_PRIOR_COMB_COUNTS[combination] = 1

def mixed_rank_graph(
input_dataframe: pd.DataFrame, args: Any, cpu_pool: Any, pbar: Any,
Expand Down Expand Up @@ -91,8 +112,8 @@ def mixed_rank_graph(
if (args.target_ranking_only == 'True') and ('3mr' not in args.heuristic):
combinations = [x for x in combinations if args.label_column in x]

combinations = prior_combinations_sample(combinations, args)
random.shuffle(combinations)
combinations = combinations[: args.combination_number_upper_bound]

if args.heuristic == 'Constant':
final_constant_imp = []
Expand Down
2 changes: 1 addition & 1 deletion outrank/task_selftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def conduct_self_test():
'outrank --task data_generator --num_synthetic_rows 100000', shell=True,
)
subprocess.run(
'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw;',
'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60;',
shell=True,
)

Expand Down

0 comments on commit 0eece60

Please sign in to comment.