outbrain · bmramor · Jan 16, 2024 · Jan 16, 2024 · Jan 16, 2024 · Jan 16, 2024
diff --git a/outrank/__main__.py b/outrank/__main__.py
@@ -165,7 +165,7 @@ def main():
     parser.add_argument(
         '--reference_model_JSON',
         type=str,
-        default='./ranking_outputs/reference_model.json',
+        default='',
         help='Reference model JSON',
     )
 

diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py
@@ -187,11 +187,15 @@ def compute_combined_features(
     join_string = ' AND_REL ' if is_3mr else ' AND '
     interaction_order = 2 if is_3mr else args.interaction_order
 
-    full_combination_space = list(
-        itertools.combinations(all_columns, interaction_order),
-    )
-
-    if args.combination_number_upper_bound:
+    if args.reference_model_JSON != "": 
+        combined_features = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True)
+        full_combination_space = [combination.split(',') for combination in combined_features]
+    else:
+        full_combination_space = list(
+            itertools.combinations(all_columns, interaction_order),
+        )
+
+    if args.combination_number_upper_bound and args.reference_model_JSON != "":
         random.shuffle(full_combination_space)
         full_combination_space = full_combination_space[
             : args.combination_number_upper_bound
@@ -517,7 +521,7 @@ def compute_batch_ranking(
             input_dataframe, logger, args, pbar,
         )
 
-    if args.interaction_order > 1:
+    if args.interaction_order > 1 or args.reference_model_JSON:
         pbar.set_description('Constructing new features')
         input_dataframe = compute_combined_features(
             input_dataframe, logger, args, pbar,

diff --git a/outrank/core_utils.py b/outrank/core_utils.py
@@ -393,7 +393,7 @@ def parse_csv_raw(data_path) -> DatasetInformationStorage:
     )
 
 
-def extract_features_from_reference_JSON(json_path: str) -> set[Any]:
+def extract_features_from_reference_JSON(json_path: str, combined_features_only = False) -> set[Any]:
     """Given a model's JSON, extract unique features"""
 
     with open(json_path) as jp:
@@ -402,7 +402,10 @@ def extract_features_from_reference_JSON(json_path: str) -> set[Any]:
     unique_features = set()
     feature_space = content['desc'].get('features', [])
     fields_space = content['desc'].get('fields', [])
-    joint_space = feature_space + fields_space
+    joint_space = feature_space + fields_space        
+
+    if combined_features_only:
+        return set([feature for feature in feature_space if len(feature.split(","))>1])
 
     for feature_tuple in joint_space:
         for individual_feature in feature_tuple.split(','):

diff --git a/tests/ranking_module_test.py b/tests/ranking_module_test.py
@@ -35,6 +35,7 @@ class args:
     combination_number_upper_bound: int = 1024
     disable_tqdm: bool = False
     mi_stratified_sampling_ratio: float = 1.0
+    reference_model_JSON: str = ""
 
 
 class CompareStrategiesTest(unittest.TestCase):