From 75d37d2d803ec971fa37a1cd2e0de809745366a4 Mon Sep 17 00:00:00 2001
From: Blaz Mramor <bmramor@outbrain.com>
Date: Thu, 28 Mar 2024 22:33:37 +0100
Subject: [PATCH] fix global variable creation

---
 outrank/core_ranking.py | 26 +++++++++++---------------
 outrank/core_utils.py   |  4 ++--
 outrank/task_summary.py |  7 ++++---
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py
index a406c26..832a540 100644
--- a/outrank/core_ranking.py
+++ b/outrank/core_ranking.py
@@ -51,9 +51,10 @@
 def prior_combinations_sample(combinations: list[tuple[Any, ...]], args: Any) -> list[tuple[Any, ...]]:
     """Make sure only relevant subspace of combinations is selected based on prior counts"""
 
-    if len(GLOBAL_PRIOR_COMB_COUNTS) == 0:
-        for combination in combinations:
-            GLOBAL_PRIOR_COMB_COUNTS[combination] += 1
+    missing_combinations = set(set(combinations)).difference(GLOBAL_PRIOR_COMB_COUNTS.keys())
+    if len(missing_combinations) > 0:
+        for combination in missing_combinations:
+            GLOBAL_PRIOR_COMB_COUNTS[combination] = 0
         tmp = combinations[:args.combination_number_upper_bound]
     else:
         tmp = list(x[0] for x in sorted(GLOBAL_PRIOR_COMB_COUNTS.items(), key=lambda x:x[1], reverse=False))[:args.combination_number_upper_bound]
@@ -121,16 +122,9 @@ def mixed_rank_graph(
     if is_prior_heuristic(args):
         reference_model_features = [(" AND ").join(tuple(sorted(item.split(",")))) for item in extract_features_from_reference_JSON(args.reference_model_JSON, all_features=True)]
         combinations = [comb for comb in combinations if comb[0] not in reference_model_features and comb[1] not in reference_model_features]
-        print(combinations)
-        print("\n\n")
 
     combinations = prior_combinations_sample(combinations, args)
-    print(GLOBAL_PRIOR_COMB_COUNTS)
-    print(combinations)
-    print("\n\n")
     random.shuffle(combinations)
-    print(combinations)
-    print("\n\n")
 
     if args.heuristic == 'Constant':
         final_constant_imp = []
@@ -206,10 +200,6 @@ def compute_combined_features(
     model_combinations = []
     full_combination_space = []
 
-    if args.reference_model_JSON != '':
-        model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True)
-        model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations]
-        full_combination_space = model_combinations
 
     if args.interaction_order > 1:
             full_combination_space = list(
@@ -217,6 +207,12 @@ def compute_combined_features(
             )
     full_combination_space = prior_combinations_sample(full_combination_space, args)
 
+    if args.reference_model_JSON != '':
+        model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True)
+        model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations]
+        if not is_prior_heuristic(args):
+            full_combination_space = model_combinations
+
     if is_prior_heuristic(args):
         full_combination_space = full_combination_space + [tuple for tuple in model_combinations if tuple not in full_combination_space]
 
@@ -243,7 +239,7 @@ def compute_combined_features(
     pbar.set_description('Concatenating into final frame ..')
     input_dataframe = pd.concat([input_dataframe, tmp_df], axis=1)
     del tmp_df
-    
+
     return input_dataframe
 
 
diff --git a/outrank/core_utils.py b/outrank/core_utils.py
index 55988d6..336cc35 100644
--- a/outrank/core_utils.py
+++ b/outrank/core_utils.py
@@ -646,8 +646,8 @@ def summarize_rare_counts(
     )
 
 
-def is_prior_heuristic(args: Any):
-    if "-prior" in args.heuristic and args.reference_model_JSON and args.reference_model_JSON != "":
+def is_prior_heuristic(args: Any) -> bool:
+    if "-prior" in args.heuristic and args.reference_model_JSON:
         return True
     return False
 
diff --git a/outrank/task_summary.py b/outrank/task_summary.py
index 38475d9..458c9b3 100644
--- a/outrank/task_summary.py
+++ b/outrank/task_summary.py
@@ -37,9 +37,10 @@ def outrank_task_result_summary(args):
 
     min_score = np.min(final_df[f'Score {args.heuristic}'].values)
     max_score = np.max(final_df[f'Score {args.heuristic}'].values)
-    final_df[f'Score {args.heuristic}'] = (
-        final_df[f'Score {args.heuristic}'] - min_score
-    ) / (max_score - min_score)
+    if "MI" in args.heuristic:
+        final_df[f'Score {args.heuristic}'] = (
+            final_df[f'Score {args.heuristic}'] - min_score
+        ) / (max_score - min_score)
     logging.info(f'Storing summary files to {args.output_folder}')
     pd.set_option('display.max_rows', None, 'display.max_columns', None)
     singles_path = os.path.join(args.output_folder, 'feature_singles.tsv')