From 60a878159bc110237bb3946cbc6ac1bba2dccbc3 Mon Sep 17 00:00:00 2001 From: Blaz Mramor Date: Tue, 12 Mar 2024 15:07:01 +0100 Subject: [PATCH 01/18] LR with priors initial implementation --- outrank/algorithms/importance_estimator.py | 33 ++++++++++++++-------- outrank/core_ranking.py | 6 +++- outrank/core_utils.py | 5 +++- outrank/task_selftest.py | 2 +- 4 files changed, 31 insertions(+), 15 deletions(-) diff --git a/outrank/algorithms/importance_estimator.py b/outrank/algorithms/importance_estimator.py index a228c4a..0b605ea 100644 --- a/outrank/algorithms/importance_estimator.py +++ b/outrank/algorithms/importance_estimator.py @@ -11,7 +11,7 @@ import pandas as pd from scipy.stats import pearsonr from sklearn.feature_selection import mutual_info_classif -from sklearn.linear_model import LogisticRegression +from sklearn.linear_model import LogisticRegression, SGDClassifier from sklearn.metrics import adjusted_mutual_info_score from sklearn.model_selection import cross_val_score from sklearn.preprocessing import OneHotEncoder @@ -38,11 +38,11 @@ def sklearn_MI(vector_first: Any, vector_second: Any) -> float: def sklearn_surrogate( - vector_first: Any, vector_second: Any, surrogate_model: str, + vector_first: Any, vector_second: Any, X: Any, surrogate_model: str ) -> float: - if surrogate_model == 'surrogate-LR': + if 'surrogate-LR' in surrogate_model: clf = LogisticRegression(max_iter=100000) - elif surrogate_model == 'surrogate-SVM': + elif 'surrogate-SVM' in surrogate_model: clf = SVC(gamma='auto', probability=True) transf = OneHotEncoder() @@ -57,17 +57,22 @@ def sklearn_surrogate( unique_values, counts = np.unique(vector_second, return_counts=True) # Establish min support for this type of ranking. - if counts[0] < len(unique_values) * (2**5): - estimate_feature_importance = 0 + # if counts[0] < len(unique_values) * (2**5): + # estimate_feature_importance = 0 - else: + if X.shape[0] == 0 and X.shape[1] == 0: vector_first = transf.fit_transform(vector_first.reshape(-1, 1)) estimate_feature_importance_list = cross_val_score( clf, vector_first, vector_second, scoring='neg_log_loss', cv=4, ) - - estimate_feature_importance = 1 + \ - np.median(estimate_feature_importance_list) + else: + X = np.concatenate((X,vector_first.reshape(-1, 1)), axis=1) + X = transf.fit_transform(X) + estimate_feature_importance_list = cross_val_score( + clf, X, vector_second, scoring='neg_log_loss', cv=4, + ) + estimate_feature_importance = 1 + \ + np.median(estimate_feature_importance_list) return estimate_feature_importance @@ -97,7 +102,7 @@ def sklearn_mi_adj(vector_first, vector_second): return estimate_feature_importance -def get_importances_estimate_pairwise(combination, args, tmp_df): +def get_importances_estimate_pairwise(combination, reference_model_features, args, tmp_df): """A method for parallel importances estimation. As interaction scoring is independent, individual scores can be computed in parallel.""" feature_one = combination[0] @@ -122,8 +127,12 @@ def get_importances_estimate_pairwise(combination, args, tmp_df): estimate_feature_importance = sklearn_MI(vector_first, vector_second) elif 'surrogate-' in args.heuristic: + X = np.array(float) + if ('-prior' in args.heuristic) and (len(reference_model_features) > 0): + X = tmp_df[reference_model_features].values + estimate_feature_importance = sklearn_surrogate( - vector_first, vector_second, args.heuristic, + vector_first, vector_second, X, args.heuristic ) elif 'MI-numba' in args.heuristic: diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index 39843f7..4806128 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -130,9 +130,13 @@ def mixed_rank_graph( # Map the scoring calls to the worker pool pbar.set_description('Allocating thread pool') + reference_model_features = {} + if 'prior' in args.heuristic: + reference_model_features = [(" AND ").join(item.split(",")) for item in extract_features_from_reference_JSON(args.reference_model_JSON, full_feature_space = True)] + # starmap is an alternative that is slower unfortunately (but nicer) def get_grounded_importances_estimate(combination: tuple[str]) -> Any: - return get_importances_estimate_pairwise(combination, args, tmp_df=tmp_df) + return get_importances_estimate_pairwise(combination, reference_model_features, args, tmp_df=tmp_df) start_enc_timer = timer() with cpu_pool as p: diff --git a/outrank/core_utils.py b/outrank/core_utils.py index 0136d42..0845b59 100644 --- a/outrank/core_utils.py +++ b/outrank/core_utils.py @@ -393,7 +393,7 @@ def parse_csv_raw(data_path) -> DatasetInformationStorage: ) -def extract_features_from_reference_JSON(json_path: str, combined_features_only = False) -> set[Any]: +def extract_features_from_reference_JSON(json_path: str, combined_features_only = False, full_feature_space = False) -> set[Any]: """Given a model's JSON, extract unique features""" with open(json_path) as jp: @@ -401,6 +401,9 @@ def extract_features_from_reference_JSON(json_path: str, combined_features_only unique_features = set() feature_space = content['desc'].get('features', []) + if full_feature_space: + return set(feature_space) + fields_space = content['desc'].get('fields', []) joint_space = feature_space + fields_space diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py index 9b5ff6f..2cb188e 100644 --- a/outrank/task_selftest.py +++ b/outrank/task_selftest.py @@ -22,7 +22,7 @@ def conduct_self_test(): 'outrank --task data_generator --num_synthetic_rows 100000', shell=True, ) subprocess.run( - 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60;', + 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60 --heuristic surrogate-LR-prior --reference_model_JSON tests/test_ref_model.json;', shell=True, ) From 7630de496eb4120e5195c1a8aaae5845c8e257dc Mon Sep 17 00:00:00 2001 From: Blaz Mramor Date: Wed, 13 Mar 2024 09:10:07 +0100 Subject: [PATCH 02/18] add sgd --- outrank/algorithms/importance_estimator.py | 2 ++ outrank/task_selftest.py | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/outrank/algorithms/importance_estimator.py b/outrank/algorithms/importance_estimator.py index 0b605ea..02777ce 100644 --- a/outrank/algorithms/importance_estimator.py +++ b/outrank/algorithms/importance_estimator.py @@ -44,6 +44,8 @@ def sklearn_surrogate( clf = LogisticRegression(max_iter=100000) elif 'surrogate-SVM' in surrogate_model: clf = SVC(gamma='auto', probability=True) + elif 'surrogate-SGD' in surrogate_model: + clf = SGDClassifier(max_iter=100000, loss='log_loss') transf = OneHotEncoder() diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py index 2cb188e..e27819b 100644 --- a/outrank/task_selftest.py +++ b/outrank/task_selftest.py @@ -22,7 +22,7 @@ def conduct_self_test(): 'outrank --task data_generator --num_synthetic_rows 100000', shell=True, ) subprocess.run( - 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60 --heuristic surrogate-LR-prior --reference_model_JSON tests/test_ref_model.json;', + 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60 --heuristic surrogate-SGD-prior --reference_model_JSON tests/test_ref_model.json;', shell=True, ) @@ -40,3 +40,7 @@ def conduct_self_test(): shutil.rmtree(path) logger.info('All tests passed, OutRank seems in shape \N{winking face}') + +if __name__ == '__main__': + conduct_self_test() + From 0e6e204aea63467369e3e0b987e7b946b590922a Mon Sep 17 00:00:00 2001 From: Blaz Mramor Date: Fri, 15 Mar 2024 11:45:40 +0100 Subject: [PATCH 03/18] adding reference model json for tests --- tests/test_ref_model.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tests/test_ref_model.json diff --git a/tests/test_ref_model.json b/tests/test_ref_model.json new file mode 100644 index 0000000..6c36715 --- /dev/null +++ b/tests/test_ref_model.json @@ -0,0 +1,5 @@ +{ + "desc": { + "features": ["f0","f1","f0,f1"] + } +} \ No newline at end of file From 289f8eb9cfa4a112fbefaf3be9edebafc59531c8 Mon Sep 17 00:00:00 2001 From: bmramor Date: Tue, 19 Mar 2024 10:36:34 +0000 Subject: [PATCH 04/18] cleaning up --- examples/run_ranking_prior.sh | 21 ++++++++++++ outrank/algorithms/importance_estimator.py | 39 +++++++++++++--------- outrank/core_ranking.py | 3 +- outrank/core_utils.py | 7 ++++ outrank/task_selftest.py | 2 +- 5 files changed, 55 insertions(+), 17 deletions(-) create mode 100644 examples/run_ranking_prior.sh diff --git a/examples/run_ranking_prior.sh b/examples/run_ranking_prior.sh new file mode 100644 index 0000000..15a8642 --- /dev/null +++ b/examples/run_ranking_prior.sh @@ -0,0 +1,21 @@ +########################################################################################################## +# A very generic OutRank invocation (default). It includes visualizations and other relevant statistics. # +########################################################################################################## + +# This run compares features "one-at-a-time" and summarizes, visualizes the outputs. +# hint - if unsure what parameters do, you can always run "outrank --help" + +outrank \ + --task all \ + --data_path $PATH_TO_YOUR_DATA \ + --data_source ob-csv \ + --heuristic surrogate-SGD-prior \ + --target_ranking_only True \ + --interaction_order 1 \ + --combination_number_upper_bound 2048 \ + --num_threads 12 \ + --output_folder ./some_output_folder \ + --subsampling 1 \ + --minibatch_size 10000 \ + --label_column info_click_valid \ + --reference_model_JSON $PATH_TO_YOUR_REFERENCE_MODEL diff --git a/outrank/algorithms/importance_estimator.py b/outrank/algorithms/importance_estimator.py index 02777ce..b9c2e52 100644 --- a/outrank/algorithms/importance_estimator.py +++ b/outrank/algorithms/importance_estimator.py @@ -17,9 +17,14 @@ from sklearn.preprocessing import OneHotEncoder from sklearn.svm import SVC +from outrank.core_utils import is_prior_heuristic + + logger = logging.getLogger('syn-logger') logger.setLevel(logging.DEBUG) +num_folds = 4 + try: from outrank.algorithms.feature_ranking import ranking_mi_numba @@ -40,13 +45,9 @@ def sklearn_MI(vector_first: Any, vector_second: Any) -> float: def sklearn_surrogate( vector_first: Any, vector_second: Any, X: Any, surrogate_model: str ) -> float: - if 'surrogate-LR' in surrogate_model: - clf = LogisticRegression(max_iter=100000) - elif 'surrogate-SVM' in surrogate_model: - clf = SVC(gamma='auto', probability=True) - elif 'surrogate-SGD' in surrogate_model: - clf = SGDClassifier(max_iter=100000, loss='log_loss') - + + clf = initialize_classifier(surrogate_model) + transf = OneHotEncoder() # They do not commute, swap if needed @@ -58,20 +59,16 @@ def sklearn_surrogate( unique_values, counts = np.unique(vector_second, return_counts=True) - # Establish min support for this type of ranking. - # if counts[0] < len(unique_values) * (2**5): - # estimate_feature_importance = 0 - if X.shape[0] == 0 and X.shape[1] == 0: vector_first = transf.fit_transform(vector_first.reshape(-1, 1)) estimate_feature_importance_list = cross_val_score( - clf, vector_first, vector_second, scoring='neg_log_loss', cv=4, + clf, vector_first, vector_second, scoring='neg_log_loss', cv=num_folds, ) else: - X = np.concatenate((X,vector_first.reshape(-1, 1)), axis=1) + X = np.concatenate((X, vector_first.reshape(-1, 1)), axis=1) X = transf.fit_transform(X) estimate_feature_importance_list = cross_val_score( - clf, X, vector_second, scoring='neg_log_loss', cv=4, + clf, X, vector_second, scoring='neg_log_loss', cv=num_folds, ) estimate_feature_importance = 1 + \ np.median(estimate_feature_importance_list) @@ -130,7 +127,7 @@ def get_importances_estimate_pairwise(combination, reference_model_features, arg elif 'surrogate-' in args.heuristic: X = np.array(float) - if ('-prior' in args.heuristic) and (len(reference_model_features) > 0): + if is_prior_heuristic(args) and (len(reference_model_features) > 0): X = tmp_df[reference_model_features].values estimate_feature_importance = sklearn_surrogate( @@ -224,3 +221,15 @@ def get_importances_estimate_nonmyopic(args: Any, tmp_df: pd.DataFrame): # TODO - nonmyopic algorithms - tmp_df \ args.label vs. label # TODO - this is to be executed directly on df - no need for parallel kernel(s) pass + + +def initialize_classifier(surrogate_model: string): + if 'surrogate-LR' in surrogate_model: + return LogisticRegression(max_iter=100000) + elif 'surrogate-SVM' in surrogate_model: + return SVC(gamma='auto', probability=True) + elif 'surrogate-SGD' in surrogate_model: + return SGDClassifier(max_iter=100000, loss='log_loss') + else: + logging.warning(f'The chosen surrogate model {surrogate_model} is not supported, falling back to surrogate-SGD') + return SGDClassifier(max_iter=100000, loss='log_loss') diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index 4806128..1722991 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -32,6 +32,7 @@ from outrank.core_utils import internal_hash from outrank.core_utils import NominalFeatureSummary from outrank.core_utils import NumericFeatureSummary +from outrank.core_utils import is_prior_heuristic from outrank.feature_transformations.ranking_transformers import FeatureTransformerGeneric from outrank.feature_transformations.ranking_transformers import FeatureTransformerNoise @@ -131,7 +132,7 @@ def mixed_rank_graph( pbar.set_description('Allocating thread pool') reference_model_features = {} - if 'prior' in args.heuristic: + if is_prior_heuristic(args): reference_model_features = [(" AND ").join(item.split(",")) for item in extract_features_from_reference_JSON(args.reference_model_JSON, full_feature_space = True)] # starmap is an alternative that is slower unfortunately (but nicer) diff --git a/outrank/core_utils.py b/outrank/core_utils.py index 0845b59..0680008 100644 --- a/outrank/core_utils.py +++ b/outrank/core_utils.py @@ -644,3 +644,10 @@ def summarize_rare_counts( final_df.to_csv( f'{args.output_folder}/feature_sparsity_summary.tsv', index=False, sep='\t', ) + + +def is_prior_heuristic(args: Any): + if "-prior" in args.heuristic and args.reference_model_JSON and args.reference_model_JSON != "": + return True + return False + diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py index e27819b..0e36d48 100644 --- a/outrank/task_selftest.py +++ b/outrank/task_selftest.py @@ -22,7 +22,7 @@ def conduct_self_test(): 'outrank --task data_generator --num_synthetic_rows 100000', shell=True, ) subprocess.run( - 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60 --heuristic surrogate-SGD-prior --reference_model_JSON tests/test_ref_model.json;', + 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60;', shell=True, ) From 54dd256f95979a8ed3becbb41725afbdd300397a Mon Sep 17 00:00:00 2001 From: bmramor Date: Tue, 19 Mar 2024 10:45:13 +0000 Subject: [PATCH 05/18] typing bug --- outrank/algorithms/importance_estimator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/outrank/algorithms/importance_estimator.py b/outrank/algorithms/importance_estimator.py index b9c2e52..4cd0fbc 100644 --- a/outrank/algorithms/importance_estimator.py +++ b/outrank/algorithms/importance_estimator.py @@ -223,7 +223,7 @@ def get_importances_estimate_nonmyopic(args: Any, tmp_df: pd.DataFrame): pass -def initialize_classifier(surrogate_model: string): +def initialize_classifier(surrogate_model: str): if 'surrogate-LR' in surrogate_model: return LogisticRegression(max_iter=100000) elif 'surrogate-SVM' in surrogate_model: From c632690531c8132c20f3d1218d49b2a8e2f9bb6e Mon Sep 17 00:00:00 2001 From: bmramor Date: Tue, 19 Mar 2024 14:16:44 +0000 Subject: [PATCH 06/18] support for combined features ranking --- examples/run_ranking_prior.sh | 4 ++-- outrank/core_ranking.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/examples/run_ranking_prior.sh b/examples/run_ranking_prior.sh index 15a8642..4421d0a 100644 --- a/examples/run_ranking_prior.sh +++ b/examples/run_ranking_prior.sh @@ -11,11 +11,11 @@ outrank \ --data_source ob-csv \ --heuristic surrogate-SGD-prior \ --target_ranking_only True \ - --interaction_order 1 \ + --interaction_order 2 \ --combination_number_upper_bound 2048 \ --num_threads 12 \ --output_folder ./some_output_folder \ - --subsampling 1 \ + --subsampling 100 \ --minibatch_size 10000 \ --label_column info_click_valid \ --reference_model_JSON $PATH_TO_YOUR_REFERENCE_MODEL diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index 1722991..60aa90a 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -197,12 +197,14 @@ def compute_combined_features( if args.reference_model_JSON != '': combined_features = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) full_combination_space = [combination.split(',') for combination in combined_features] + if is_prior_heuristic(args): + full_combination_space = list(set(full_combination_space) | set(itertools.combinations(all_columns, interaction_order))) else: full_combination_space = list( itertools.combinations(all_columns, interaction_order), ) - if args.combination_number_upper_bound and args.reference_model_JSON != '': + if args.combination_number_upper_bound: random.shuffle(full_combination_space) full_combination_space = full_combination_space[ : args.combination_number_upper_bound From 5e5380305e6a0b49446632a5a66f4a2101970179 Mon Sep 17 00:00:00 2001 From: bmramor Date: Wed, 20 Mar 2024 13:28:19 +0000 Subject: [PATCH 07/18] combinations for priors --- outrank/algorithms/importance_estimator.py | 2 +- outrank/core_ranking.py | 39 +++++++++++++++------- outrank/task_selftest.py | 2 +- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/outrank/algorithms/importance_estimator.py b/outrank/algorithms/importance_estimator.py index 4cd0fbc..dd28823 100644 --- a/outrank/algorithms/importance_estimator.py +++ b/outrank/algorithms/importance_estimator.py @@ -59,7 +59,7 @@ def sklearn_surrogate( unique_values, counts = np.unique(vector_second, return_counts=True) - if X.shape[0] == 0 and X.shape[1] == 0: + if X.size <= 1: vector_first = transf.fit_transform(vector_first.reshape(-1, 1)) estimate_feature_importance_list = cross_val_score( clf, vector_first, vector_second, scoring='neg_log_loss', cv=num_folds, diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index 60aa90a..2976df9 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -119,6 +119,10 @@ def mixed_rank_graph( combinations = prior_combinations_sample(combinations, args) random.shuffle(combinations) + reference_model_features = {} + if is_prior_heuristic(args): + reference_model_features = [(" AND ").join(tuple(sorted(item.split(",")))) for item in extract_features_from_reference_JSON(args.reference_model_JSON, full_feature_space = True)] + if args.heuristic == 'Constant': final_constant_imp = [] for c1, c2 in combinations: @@ -131,10 +135,6 @@ def mixed_rank_graph( # Map the scoring calls to the worker pool pbar.set_description('Allocating thread pool') - reference_model_features = {} - if is_prior_heuristic(args): - reference_model_features = [(" AND ").join(item.split(",")) for item in extract_features_from_reference_JSON(args.reference_model_JSON, full_feature_space = True)] - # starmap is an alternative that is slower unfortunately (but nicer) def get_grounded_importances_estimate(combination: tuple[str]) -> Any: return get_importances_estimate_pairwise(combination, reference_model_features, args, tmp_df=tmp_df) @@ -194,21 +194,36 @@ def compute_combined_features( join_string = ' AND_REL ' if is_3mr else ' AND ' interaction_order = 2 if is_3mr else args.interaction_order - if args.reference_model_JSON != '': - combined_features = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) - full_combination_space = [combination.split(',') for combination in combined_features] - if is_prior_heuristic(args): - full_combination_space = list(set(full_combination_space) | set(itertools.combinations(all_columns, interaction_order))) + model_combinations = [] + if is_prior_heuristic(args): + model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) + model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] + feature_combination_space = [] + if args.interaction_order > 1: + feature_combination_space = list( + itertools.combinations(all_columns, interaction_order), + ) + + full_combination_space = feature_combination_space + [tuple for tuple in model_combinations if tuple not in feature_combination_space] + del feature_combination_space else: - full_combination_space = list( - itertools.combinations(all_columns, interaction_order), - ) + if args.reference_model_JSON != '': + model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) + model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] + full_combination_space = [combination.split(',') for combination in model_combinations] + else: + full_combination_space = list( + itertools.combinations(all_columns, interaction_order), + ) if args.combination_number_upper_bound: random.shuffle(full_combination_space) full_combination_space = full_combination_space[ : args.combination_number_upper_bound ] + if is_prior_heuristic(args): + full_combination_space = full_combination_space + [tuple for tuple in model_combinations if tuple not in full_combination_space] + com_counter = 0 new_feature_hash = {} diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py index 0e36d48..a63abd5 100644 --- a/outrank/task_selftest.py +++ b/outrank/task_selftest.py @@ -22,7 +22,7 @@ def conduct_self_test(): 'outrank --task data_generator --num_synthetic_rows 100000', shell=True, ) subprocess.run( - 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60;', + 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60 --heuristic surrogate-SGD-prior --reference_model_JSON tests/test_ref_model.json --interaction_order 2;', shell=True, ) From 92408bb0240d8c09c68152ddd6d3f4c7c9c63fa7 Mon Sep 17 00:00:00 2001 From: bmramor Date: Thu, 21 Mar 2024 09:40:41 +0000 Subject: [PATCH 08/18] remove a bug for non-prior surrogate --- .../algorithms/synthetic_data_generators/generator_naive.py | 3 ++- outrank/core_ranking.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/outrank/algorithms/synthetic_data_generators/generator_naive.py b/outrank/algorithms/synthetic_data_generators/generator_naive.py index 23c5f8a..d0606e6 100644 --- a/outrank/algorithms/synthetic_data_generators/generator_naive.py +++ b/outrank/algorithms/synthetic_data_generators/generator_naive.py @@ -13,7 +13,8 @@ def generate_random_matrix(num_features=100, size=20000): target = sample[:, 30] # Some noise - target[target < 20] = 0 + target[target < 40] = 0 + target[target > 39] = 0 return sample, target diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index 2976df9..c2b70da 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -210,7 +210,7 @@ def compute_combined_features( if args.reference_model_JSON != '': model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] - full_combination_space = [combination.split(',') for combination in model_combinations] + full_combination_space = model_combinations else: full_combination_space = list( itertools.combinations(all_columns, interaction_order), @@ -707,6 +707,7 @@ def estimate_importances_minibatches( logger, local_pbar, ) + print(importances_batch) bounds_storage_batch.append(bounds_storage) memory_storage_batch.append(memory_storage) From 31d3dd575b0e4388e431e7275d43097b36ff4562 Mon Sep 17 00:00:00 2001 From: bmramor Date: Thu, 21 Mar 2024 21:36:38 +0000 Subject: [PATCH 09/18] some more bug handling --- outrank/core_ranking.py | 13 ++++--------- outrank/task_selftest.py | 14 +++++--------- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index c2b70da..dc56aaa 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -116,8 +116,8 @@ def mixed_rank_graph( out_time_struct['encoding_columns'] = end_enc_timer - start_enc_timer combinations = get_combinations_from_columns(all_columns, args) - combinations = prior_combinations_sample(combinations, args) - random.shuffle(combinations) + #combinations = prior_combinations_sample(combinations, args) + #random.shuffle(combinations) reference_model_features = {} if is_prior_heuristic(args): @@ -195,17 +195,14 @@ def compute_combined_features( interaction_order = 2 if is_3mr else args.interaction_order model_combinations = [] + full_combination_space = [] if is_prior_heuristic(args): model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] - feature_combination_space = [] if args.interaction_order > 1: - feature_combination_space = list( + full_combination_space = list( itertools.combinations(all_columns, interaction_order), ) - - full_combination_space = feature_combination_space + [tuple for tuple in model_combinations if tuple not in feature_combination_space] - del feature_combination_space else: if args.reference_model_JSON != '': model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) @@ -247,7 +244,6 @@ def compute_combined_features( pbar.set_description('Concatenating into final frame ..') input_dataframe = pd.concat([input_dataframe, tmp_df], axis=1) del tmp_df - return input_dataframe @@ -707,7 +703,6 @@ def estimate_importances_minibatches( logger, local_pbar, ) - print(importances_batch) bounds_storage_batch.append(bounds_storage) memory_storage_batch.append(memory_storage) diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py index a63abd5..9335843 100644 --- a/outrank/task_selftest.py +++ b/outrank/task_selftest.py @@ -1,38 +1,31 @@ # helper set of methods that enable anywhere verification of core functions from __future__ import annotations - import logging import os import shutil import subprocess - import pandas as pd - logging.basicConfig( format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', ) logger = logging.getLogger('syn-logger') logger.setLevel(logging.DEBUG) - - def conduct_self_test(): # Simulate full flow, ranking only subprocess.run( 'outrank --task data_generator --num_synthetic_rows 100000', shell=True, ) subprocess.run( - 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60 --heuristic surrogate-SGD-prior --reference_model_JSON tests/test_ref_model.json --interaction_order 2;', + 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60;', shell=True, ) dfx = pd.read_csv('ranking_outputs/pairwise_ranks.tsv', sep='\t') - logger.info("Verifying output's properties ..") assert dfx.shape[0] == 120 assert dfx.shape[1] == 3 assert dfx['FeatureA'].values.tolist().pop() == 'label-(81; 100)' or dfx['FeatureB'].values.tolist().pop() == 'label-(81; 100)' - to_remove = ['ranking_outputs', 'test_data_synthetic'] for path in to_remove: if os.path.exists(path) and os.path.isdir(path): @@ -41,6 +34,9 @@ def conduct_self_test(): logger.info('All tests passed, OutRank seems in shape \N{winking face}') + shutil.rmtree(path) + + logger.info('All tests passed, OutRank seems in shape \N{winking face}') + if __name__ == '__main__': conduct_self_test() - From 344be92c2227877f382da2b57d21b2831d828def Mon Sep 17 00:00:00 2001 From: bmramor Date: Thu, 21 Mar 2024 21:39:04 +0000 Subject: [PATCH 10/18] formatting --- outrank/task_selftest.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py index 9335843..c0cb54a 100644 --- a/outrank/task_selftest.py +++ b/outrank/task_selftest.py @@ -1,16 +1,21 @@ # helper set of methods that enable anywhere verification of core functions from __future__ import annotations + import logging import os import shutil import subprocess + import pandas as pd + logging.basicConfig( format='%(asctime)s - %(message)s', datefmt='%d-%b-%y %H:%M:%S', ) logger = logging.getLogger('syn-logger') logger.setLevel(logging.DEBUG) + + def conduct_self_test(): # Simulate full flow, ranking only subprocess.run( @@ -22,10 +27,12 @@ def conduct_self_test(): ) dfx = pd.read_csv('ranking_outputs/pairwise_ranks.tsv', sep='\t') + logger.info("Verifying output's properties ..") assert dfx.shape[0] == 120 assert dfx.shape[1] == 3 assert dfx['FeatureA'].values.tolist().pop() == 'label-(81; 100)' or dfx['FeatureB'].values.tolist().pop() == 'label-(81; 100)' + to_remove = ['ranking_outputs', 'test_data_synthetic'] for path in to_remove: if os.path.exists(path) and os.path.isdir(path): From bcd128c622d0e43586be55b3eb8fcda26f5cba3e Mon Sep 17 00:00:00 2001 From: bmramor Date: Thu, 21 Mar 2024 21:39:54 +0000 Subject: [PATCH 11/18] formatting --- outrank/task_selftest.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py index c0cb54a..78b11d1 100644 --- a/outrank/task_selftest.py +++ b/outrank/task_selftest.py @@ -41,9 +41,5 @@ def conduct_self_test(): logger.info('All tests passed, OutRank seems in shape \N{winking face}') - shutil.rmtree(path) - - logger.info('All tests passed, OutRank seems in shape \N{winking face}') - if __name__ == '__main__': conduct_self_test() From 9ef5117832c21aeb337bf462c345b82a3b73a355 Mon Sep 17 00:00:00 2001 From: bmramor Date: Fri, 22 Mar 2024 09:48:14 +0000 Subject: [PATCH 12/18] fix tests --- .../synthetic_data_generators/generator_naive.py | 2 +- outrank/core_ranking.py | 1 + outrank/task_selftest.py | 7 +++---- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/outrank/algorithms/synthetic_data_generators/generator_naive.py b/outrank/algorithms/synthetic_data_generators/generator_naive.py index d0606e6..7404360 100644 --- a/outrank/algorithms/synthetic_data_generators/generator_naive.py +++ b/outrank/algorithms/synthetic_data_generators/generator_naive.py @@ -14,7 +14,7 @@ def generate_random_matrix(num_features=100, size=20000): # Some noise target[target < 40] = 0 - target[target > 39] = 0 + target[target > 39] = 1 return sample, target diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index dc56aaa..6d1e855 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -244,6 +244,7 @@ def compute_combined_features( pbar.set_description('Concatenating into final frame ..') input_dataframe = pd.concat([input_dataframe, tmp_df], axis=1) del tmp_df + return input_dataframe diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py index 78b11d1..744120a 100644 --- a/outrank/task_selftest.py +++ b/outrank/task_selftest.py @@ -1,6 +1,5 @@ # helper set of methods that enable anywhere verification of core functions from __future__ import annotations - import logging import os import shutil @@ -22,16 +21,16 @@ def conduct_self_test(): 'outrank --task data_generator --num_synthetic_rows 100000', shell=True, ) subprocess.run( - 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw --combination_number_upper_bound 60;', + 'outrank --task ranking --data_path test_data_synthetic --data_source csv-raw;', shell=True, ) dfx = pd.read_csv('ranking_outputs/pairwise_ranks.tsv', sep='\t') logger.info("Verifying output's properties ..") - assert dfx.shape[0] == 120 + assert dfx.shape[0] == 201 assert dfx.shape[1] == 3 - assert dfx['FeatureA'].values.tolist().pop() == 'label-(81; 100)' or dfx['FeatureB'].values.tolist().pop() == 'label-(81; 100)' + assert dfx['FeatureA'].values.tolist().pop() == 'label-(2; 100)' or dfx['FeatureB'].values.tolist().pop() == 'label-(2; 100)' to_remove = ['ranking_outputs', 'test_data_synthetic'] for path in to_remove: From 41c097527d948aa4c49dd951129588d39665f3b1 Mon Sep 17 00:00:00 2001 From: Blaz Mramor Date: Thu, 28 Mar 2024 22:02:05 +0100 Subject: [PATCH 13/18] debugging --- outrank/algorithms/importance_estimator.py | 16 ++++++---------- outrank/core_utils.py | 4 ++-- 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/outrank/algorithms/importance_estimator.py b/outrank/algorithms/importance_estimator.py index dd28823..e37ba1c 100644 --- a/outrank/algorithms/importance_estimator.py +++ b/outrank/algorithms/importance_estimator.py @@ -57,19 +57,15 @@ def sklearn_surrogate( vector_first = vector_third del vector_third - unique_values, counts = np.unique(vector_second, return_counts=True) - if X.size <= 1: - vector_first = transf.fit_transform(vector_first.reshape(-1, 1)) - estimate_feature_importance_list = cross_val_score( - clf, vector_first, vector_second, scoring='neg_log_loss', cv=num_folds, - ) + X = vector_first.reshape(-1, 1) else: X = np.concatenate((X, vector_first.reshape(-1, 1)), axis=1) - X = transf.fit_transform(X) - estimate_feature_importance_list = cross_val_score( - clf, X, vector_second, scoring='neg_log_loss', cv=num_folds, - ) + + X = transf.fit_transform(X) + estimate_feature_importance_list = cross_val_score( + clf, X, vector_second, scoring='neg_log_loss', cv=num_folds, + ) estimate_feature_importance = 1 + \ np.median(estimate_feature_importance_list) diff --git a/outrank/core_utils.py b/outrank/core_utils.py index 0680008..55988d6 100644 --- a/outrank/core_utils.py +++ b/outrank/core_utils.py @@ -393,7 +393,7 @@ def parse_csv_raw(data_path) -> DatasetInformationStorage: ) -def extract_features_from_reference_JSON(json_path: str, combined_features_only = False, full_feature_space = False) -> set[Any]: +def extract_features_from_reference_JSON(json_path: str, combined_features_only = False, all_features = False) -> set[Any]: """Given a model's JSON, extract unique features""" with open(json_path) as jp: @@ -401,7 +401,7 @@ def extract_features_from_reference_JSON(json_path: str, combined_features_only unique_features = set() feature_space = content['desc'].get('features', []) - if full_feature_space: + if all_features: return set(feature_space) fields_space = content['desc'].get('fields', []) From bfbe096c0dfa2ec8446533a8b2a9ce5139fdc516 Mon Sep 17 00:00:00 2001 From: Blaz Mramor Date: Thu, 28 Mar 2024 22:03:36 +0100 Subject: [PATCH 14/18] debugging --- outrank/core_ranking.py | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index 6d1e855..a406c26 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -116,12 +116,21 @@ def mixed_rank_graph( out_time_struct['encoding_columns'] = end_enc_timer - start_enc_timer combinations = get_combinations_from_columns(all_columns, args) - #combinations = prior_combinations_sample(combinations, args) - #random.shuffle(combinations) reference_model_features = {} if is_prior_heuristic(args): - reference_model_features = [(" AND ").join(tuple(sorted(item.split(",")))) for item in extract_features_from_reference_JSON(args.reference_model_JSON, full_feature_space = True)] + reference_model_features = [(" AND ").join(tuple(sorted(item.split(",")))) for item in extract_features_from_reference_JSON(args.reference_model_JSON, all_features=True)] + combinations = [comb for comb in combinations if comb[0] not in reference_model_features and comb[1] not in reference_model_features] + print(combinations) + print("\n\n") + + combinations = prior_combinations_sample(combinations, args) + print(GLOBAL_PRIOR_COMB_COUNTS) + print(combinations) + print("\n\n") + random.shuffle(combinations) + print(combinations) + print("\n\n") if args.heuristic == 'Constant': final_constant_imp = [] @@ -196,30 +205,20 @@ def compute_combined_features( model_combinations = [] full_combination_space = [] - if is_prior_heuristic(args): + + if args.reference_model_JSON != '': model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] - if args.interaction_order > 1: - full_combination_space = list( - itertools.combinations(all_columns, interaction_order), - ) - else: - if args.reference_model_JSON != '': - model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) - model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] - full_combination_space = model_combinations - else: + full_combination_space = model_combinations + + if args.interaction_order > 1: full_combination_space = list( itertools.combinations(all_columns, interaction_order), ) + full_combination_space = prior_combinations_sample(full_combination_space, args) - if args.combination_number_upper_bound: - random.shuffle(full_combination_space) - full_combination_space = full_combination_space[ - : args.combination_number_upper_bound - ] - if is_prior_heuristic(args): - full_combination_space = full_combination_space + [tuple for tuple in model_combinations if tuple not in full_combination_space] + if is_prior_heuristic(args): + full_combination_space = full_combination_space + [tuple for tuple in model_combinations if tuple not in full_combination_space] com_counter = 0 From 75d37d2d803ec971fa37a1cd2e0de809745366a4 Mon Sep 17 00:00:00 2001 From: Blaz Mramor Date: Thu, 28 Mar 2024 22:33:37 +0100 Subject: [PATCH 15/18] fix global variable creation --- outrank/core_ranking.py | 26 +++++++++++--------------- outrank/core_utils.py | 4 ++-- outrank/task_summary.py | 7 ++++--- 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index a406c26..832a540 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -51,9 +51,10 @@ def prior_combinations_sample(combinations: list[tuple[Any, ...]], args: Any) -> list[tuple[Any, ...]]: """Make sure only relevant subspace of combinations is selected based on prior counts""" - if len(GLOBAL_PRIOR_COMB_COUNTS) == 0: - for combination in combinations: - GLOBAL_PRIOR_COMB_COUNTS[combination] += 1 + missing_combinations = set(set(combinations)).difference(GLOBAL_PRIOR_COMB_COUNTS.keys()) + if len(missing_combinations) > 0: + for combination in missing_combinations: + GLOBAL_PRIOR_COMB_COUNTS[combination] = 0 tmp = combinations[:args.combination_number_upper_bound] else: tmp = list(x[0] for x in sorted(GLOBAL_PRIOR_COMB_COUNTS.items(), key=lambda x:x[1], reverse=False))[:args.combination_number_upper_bound] @@ -121,16 +122,9 @@ def mixed_rank_graph( if is_prior_heuristic(args): reference_model_features = [(" AND ").join(tuple(sorted(item.split(",")))) for item in extract_features_from_reference_JSON(args.reference_model_JSON, all_features=True)] combinations = [comb for comb in combinations if comb[0] not in reference_model_features and comb[1] not in reference_model_features] - print(combinations) - print("\n\n") combinations = prior_combinations_sample(combinations, args) - print(GLOBAL_PRIOR_COMB_COUNTS) - print(combinations) - print("\n\n") random.shuffle(combinations) - print(combinations) - print("\n\n") if args.heuristic == 'Constant': final_constant_imp = [] @@ -206,10 +200,6 @@ def compute_combined_features( model_combinations = [] full_combination_space = [] - if args.reference_model_JSON != '': - model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) - model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] - full_combination_space = model_combinations if args.interaction_order > 1: full_combination_space = list( @@ -217,6 +207,12 @@ def compute_combined_features( ) full_combination_space = prior_combinations_sample(full_combination_space, args) + if args.reference_model_JSON != '': + model_combinations = extract_features_from_reference_JSON(args.reference_model_JSON, combined_features_only = True) + model_combinations = [tuple(sorted(combination.split(','))) for combination in model_combinations] + if not is_prior_heuristic(args): + full_combination_space = model_combinations + if is_prior_heuristic(args): full_combination_space = full_combination_space + [tuple for tuple in model_combinations if tuple not in full_combination_space] @@ -243,7 +239,7 @@ def compute_combined_features( pbar.set_description('Concatenating into final frame ..') input_dataframe = pd.concat([input_dataframe, tmp_df], axis=1) del tmp_df - + return input_dataframe diff --git a/outrank/core_utils.py b/outrank/core_utils.py index 55988d6..336cc35 100644 --- a/outrank/core_utils.py +++ b/outrank/core_utils.py @@ -646,8 +646,8 @@ def summarize_rare_counts( ) -def is_prior_heuristic(args: Any): - if "-prior" in args.heuristic and args.reference_model_JSON and args.reference_model_JSON != "": +def is_prior_heuristic(args: Any) -> bool: + if "-prior" in args.heuristic and args.reference_model_JSON: return True return False diff --git a/outrank/task_summary.py b/outrank/task_summary.py index 38475d9..458c9b3 100644 --- a/outrank/task_summary.py +++ b/outrank/task_summary.py @@ -37,9 +37,10 @@ def outrank_task_result_summary(args): min_score = np.min(final_df[f'Score {args.heuristic}'].values) max_score = np.max(final_df[f'Score {args.heuristic}'].values) - final_df[f'Score {args.heuristic}'] = ( - final_df[f'Score {args.heuristic}'] - min_score - ) / (max_score - min_score) + if "MI" in args.heuristic: + final_df[f'Score {args.heuristic}'] = ( + final_df[f'Score {args.heuristic}'] - min_score + ) / (max_score - min_score) logging.info(f'Storing summary files to {args.output_folder}') pd.set_option('display.max_rows', None, 'display.max_columns', None) singles_path = os.path.join(args.output_folder, 'feature_singles.tsv') From cf305ae273632f1881b9c88a0ac713e021219d7c Mon Sep 17 00:00:00 2001 From: Blaz Mramor Date: Sun, 31 Mar 2024 21:04:33 +0200 Subject: [PATCH 16/18] prior combinations fix --- outrank/core_ranking.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index 832a540..5633ada 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -51,13 +51,15 @@ def prior_combinations_sample(combinations: list[tuple[Any, ...]], args: Any) -> list[tuple[Any, ...]]: """Make sure only relevant subspace of combinations is selected based on prior counts""" + if len(combinations) == 0: + return [] + missing_combinations = set(set(combinations)).difference(GLOBAL_PRIOR_COMB_COUNTS.keys()) if len(missing_combinations) > 0: for combination in missing_combinations: GLOBAL_PRIOR_COMB_COUNTS[combination] = 0 - tmp = combinations[:args.combination_number_upper_bound] - else: - tmp = list(x[0] for x in sorted(GLOBAL_PRIOR_COMB_COUNTS.items(), key=lambda x:x[1], reverse=False))[:args.combination_number_upper_bound] + + tmp = sorted(combinations, key=GLOBAL_PRIOR_COMB_COUNTS.get, reverse=False)[:args.combination_number_upper_bound] for combination in tmp: GLOBAL_PRIOR_COMB_COUNTS[combination] += 1 From 6d650ddb7694d0dfafc06f5c247bc525e84cb0cf Mon Sep 17 00:00:00 2001 From: Blaz Mramor Date: Sun, 31 Mar 2024 21:40:21 +0200 Subject: [PATCH 17/18] remove logger from function --- outrank/core_ranking.py | 5 ++--- tests/ranking_module_test.py | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/outrank/core_ranking.py b/outrank/core_ranking.py index 5633ada..30f892c 100644 --- a/outrank/core_ranking.py +++ b/outrank/core_ranking.py @@ -186,7 +186,6 @@ def enrich_with_transformations( def compute_combined_features( input_dataframe: pd.DataFrame, - logger: Any, args: Any, pbar: Any, is_3mr: bool = False, @@ -547,7 +546,7 @@ def compute_batch_ranking( if args.interaction_order > 1 or args.reference_model_JSON: pbar.set_description('Constructing new features') input_dataframe = compute_combined_features( - input_dataframe, logger, args, pbar, + input_dataframe, args, pbar, ) # in case of 3mr we compute the score of combinations against the target @@ -556,7 +555,7 @@ def compute_batch_ranking( 'Constructing features for computing relations in 3mr', ) input_dataframe = compute_combined_features( - input_dataframe, logger, args, pbar, True, + input_dataframe, args, pbar, True, ) if args.include_noise_baseline_features == 'True' and args.heuristic != 'Constant': diff --git a/tests/ranking_module_test.py b/tests/ranking_module_test.py index e49880c..fd99092 100644 --- a/tests/ranking_module_test.py +++ b/tests/ranking_module_test.py @@ -82,7 +82,7 @@ def test_compute_combinations(self): random_df.columns = ['F1', 'F2', 'F3'] local_pbar = tqdm.tqdm(total=100, position=0) transformed_df = compute_combined_features( - random_df, None, args, local_pbar, + random_df, args, local_pbar, ) self.assertEqual(transformed_df.shape[1], 4) @@ -91,7 +91,7 @@ def test_compute_combinations(self): random_df = pd.DataFrame(random_matrix) random_df.columns = ['F1', 'F2', 'F3'] transformed_df = compute_combined_features( - random_df, None, args, local_pbar, + random_df, args, local_pbar, ) self.assertEqual(transformed_df.shape[1], 6) From 5dd9dd9d79424f272556c27181ab02b78893353b Mon Sep 17 00:00:00 2001 From: Blaz Mramor Date: Tue, 2 Apr 2024 14:48:14 +0200 Subject: [PATCH 18/18] double line space --- outrank/task_selftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/outrank/task_selftest.py b/outrank/task_selftest.py index 744120a..cd1cb45 100644 --- a/outrank/task_selftest.py +++ b/outrank/task_selftest.py @@ -40,5 +40,6 @@ def conduct_self_test(): logger.info('All tests passed, OutRank seems in shape \N{winking face}') + if __name__ == '__main__': conduct_self_test()