From 75d37d2d803ec971fa37a1cd2e0de809745366a4 Mon Sep 17 00:00:00 2001 From: Blaz Mramor Date: Thu, 28 Mar 2024 22:33:37 +0100 Subject: [PATCH] fix global variable creation --- outrank/core_ranking.py | 26 +++++++++++--------------- outrank/core_utils.py | 4 ++-- outrank/task_summary.py | 7 ++++--- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index a406c26..832a540 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -51,9 +51,10 @@ def prior_combinations_sample(combinations: list[tuple[Any, ...]], args: Any) -> list[tuple[Any, ...]]: """Make sure only relevant subspace of combinations is selected based on prior counts""" - if len(GLOBAL_PRIOR_COMB_COUNTS) == 0: - for combination in combinations: - GLOBAL_PRIOR_COMB_COUNTS[combination] += 1 + missing_combinations = set(set(combinations)).difference(GLOBAL_PRIOR_COMB_COUNTS.keys()) + if len(missing_combinations) > 0: + for combination in missing_combinations: + GLOBAL_PRIOR_COMB_COUNTS[combination] = 0 tmp = combinations[:args.combination_number_upper_bound] else: tmp = list(x[0] for x in sorted(GLOBAL_PRIOR_COMB_COUNTS.items(), key=lambda x:x[1], reverse=False))[:args.combination_number_upper_bound] @@ -121,16 +122,9 @@ def mixed_rank_graph( if is_prior_heuristic(args): reference_model_features = [(" AND ").join(tuple(sorted(item.split(",")))) for item in extract_features_from_reference_JSON(args.reference_model_JSON, all_features=True)] combinations = [comb for comb in combinations if comb[0] not in reference_model_features and comb[1] not in reference_model_features] - print(combinations) - print("\n\n") combinations = prior_combinations_sample(combinations, args) - print(GLOBAL_PRIOR_COMB_COUNTS) - print(combinations) - print("\n\n") random.shuffle(combinations) - print(combinations) - print("\n\n") if args.heuristic == 'Constant': final_constant_imp = [] @@ -206,10 +200,6 @@ def compute_combined_features( model_combinations = [] full_combination_space = [] - if args.reference_model_JSON != '': - model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) - model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] - full_combination_space = model_combinations if args.interaction_order > 1: full_combination_space = list( @@ -217,6 +207,12 @@ def compute_combined_features( ) full_combination_space = prior_combinations_sample(full_combination_space, args) + if args.reference_model_JSON != '': + model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) + model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] + if not is_prior_heuristic(args): + full_combination_space = model_combinations + if is_prior_heuristic(args): full_combination_space = full_combination_space + [tuple for tuple in model_combinations if tuple not in full_combination_space] @@ -243,7 +239,7 @@ def compute_combined_features( pbar.set_description('Concatenating into final frame ..') input_dataframe = pd.concat([input_dataframe, tmp_df], axis=1) del tmp_df - + return input_dataframe diff --git a/outrank/core_utils.py b/outrank/core_utils.py index 55988d6..336cc35 100644 --- a/outrank/core_utils.py +++ b/outrank/core_utils.py @@ -646,8 +646,8 @@ def summarize_rare_counts( ) -def is_prior_heuristic(args: Any): - if "-prior" in args.heuristic and args.reference_model_JSON and args.reference_model_JSON != "": +def is_prior_heuristic(args: Any) -> bool: + if "-prior" in args.heuristic and args.reference_model_JSON: return True return False diff --git a/outrank/task_summary.py b/outrank/task_summary.py index 38475d9..458c9b3 100644 --- a/outrank/task_summary.py +++ b/outrank/task_summary.py @@ -37,9 +37,10 @@ def outrank_task_result_summary(args): min_score = np.min(final_df[f'Score {args.heuristic}'].values) max_score = np.max(final_df[f'Score {args.heuristic}'].values) - final_df[f'Score {args.heuristic}'] = ( - final_df[f'Score {args.heuristic}'] - min_score - ) / (max_score - min_score) + if "MI" in args.heuristic: + final_df[f'Score {args.heuristic}'] = ( + final_df[f'Score {args.heuristic}'] - min_score + ) / (max_score - min_score) logging.info(f'Storing summary files to {args.output_folder}') pd.set_option('display.max_rows', None, 'display.max_columns', None) singles_path = os.path.join(args.output_folder, 'feature_singles.tsv')