From 25dee2d5ab9dbe2e58eba90ffcb60cea66b836ef Mon Sep 17 00:00:00 2001 From: Konstantinos Azas Date: Wed, 9 Oct 2024 20:24:03 +0300 Subject: [PATCH] Update unit tests --- mlxtend/feature_selection/column_selector.py | 2 +- .../tests/test_association_rules.py | 82 ++++++++++++------- 2 files changed, 53 insertions(+), 31 deletions(-) diff --git a/mlxtend/feature_selection/column_selector.py b/mlxtend/feature_selection/column_selector.py index e09c35fdc..39fcc2816 100644 --- a/mlxtend/feature_selection/column_selector.py +++ b/mlxtend/feature_selection/column_selector.py @@ -78,7 +78,7 @@ def transform(self, X, y=None): # We use the loc or iloc accessor if the input is a pandas dataframe if hasattr(X, "loc") or hasattr(X, "iloc"): - if type(self.cols) == tuple: + if isinstance(self.cols, tuple): self.cols = list(self.cols) types = {type(i) for i in self.cols} if len(types) > 1: diff --git a/mlxtend/frequent_patterns/tests/test_association_rules.py b/mlxtend/frequent_patterns/tests/test_association_rules.py index 1035183c9..309bc47b8 100644 --- a/mlxtend/frequent_patterns/tests/test_association_rules.py +++ b/mlxtend/frequent_patterns/tests/test_association_rules.py @@ -42,19 +42,20 @@ "consequent support", "support", "confidence", + "representativity", "lift", "leverage", "conviction", "zhangs_metric", "jaccard", "certainty", - "kulczynski", + "kulczynski" ] # fmt: off def test_default(): - res_df = association_rules(df_freq_items) + res_df = association_rules(df_freq_items, df, len(df)) res_df["antecedents"] = res_df["antecedents"].apply(lambda x: str(frozenset(x))) res_df["consequents"] = res_df["consequents"].apply(lambda x: str(frozenset(x))) res_df.sort_values(columns_ordered, inplace=True) @@ -85,7 +86,7 @@ def test_default(): def test_datatypes(): - res_df = association_rules(df_freq_items) + res_df = association_rules(df_freq_items, df, len(df)) for i in res_df["antecedents"]: assert isinstance(i, frozenset) is True @@ -100,7 +101,7 @@ def test_datatypes(): lambda x: set(x) ) - res_df = association_rules(df_freq_items) + res_df = association_rules(df_freq_items, df, len(df)) for i in res_df["antecedents"]: assert isinstance(i, frozenset) is True @@ -110,16 +111,16 @@ def test_datatypes(): def test_no_support_col(): df_no_support_col = df_freq_items.loc[:, ["itemsets"]] - numpy_assert_raises(ValueError, association_rules, df_no_support_col) + numpy_assert_raises(ValueError, association_rules, df_no_support_col, df, len(df)) def test_no_itemsets_col(): df_no_itemsets_col = df_freq_items.loc[:, ["support"]] - numpy_assert_raises(ValueError, association_rules, df_no_itemsets_col) + numpy_assert_raises(ValueError, association_rules, df_no_itemsets_col, df, len(df)) def test_wrong_metric(): - numpy_assert_raises(ValueError, association_rules, df_freq_items, "unicorn") + numpy_assert_raises(ValueError, association_rules, df_freq_items, df, len(df), False, "unicorn") def test_empty_result(): @@ -131,6 +132,7 @@ def test_empty_result(): "consequent support", "support", "confidence", + "representativity", "lift", "leverage", "conviction", @@ -140,82 +142,100 @@ def test_empty_result(): "kulczynski", ] ) - res_df = association_rules(df_freq_items, min_threshold=2) + res_df = association_rules(df_freq_items, df, len(df), min_threshold=2) assert res_df.equals(expect) def test_leverage(): - res_df = association_rules(df_freq_items, min_threshold=0.1, metric="leverage") + res_df = association_rules( + df_freq_items, df, len(df), min_threshold=0.1, metric="leverage" + ) assert res_df.values.shape[0] == 6 res_df = association_rules( - df_freq_items_with_colnames, min_threshold=0.1, metric="leverage" + df_freq_items_with_colnames, df, len(df), min_threshold=0.1, metric="leverage" ) assert res_df.values.shape[0] == 6 def test_conviction(): - res_df = association_rules(df_freq_items, min_threshold=1.5, metric="conviction") + res_df = association_rules( + df_freq_items, df, len(df), min_threshold=1.5, metric="conviction" + ) assert res_df.values.shape[0] == 11 res_df = association_rules( - df_freq_items_with_colnames, min_threshold=1.5, metric="conviction" + df_freq_items_with_colnames, df, len(df), min_threshold=1.5, metric="conviction" ) assert res_df.values.shape[0] == 11 def test_lift(): - res_df = association_rules(df_freq_items, min_threshold=1.1, metric="lift") + res_df = association_rules( + df_freq_items, df, len(df), min_threshold=1.1, metric="lift" + ) assert res_df.values.shape[0] == 6 res_df = association_rules( - df_freq_items_with_colnames, min_threshold=1.1, metric="lift" + df_freq_items_with_colnames, df, len(df), min_threshold=1.1, metric="lift" ) assert res_df.values.shape[0] == 6 def test_confidence(): - res_df = association_rules(df_freq_items, min_threshold=0.8, metric="confidence") + res_df = association_rules( + df_freq_items, df, len(df), min_threshold=0.8, metric="confidence" + ) assert res_df.values.shape[0] == 9 res_df = association_rules( - df_freq_items_with_colnames, min_threshold=0.8, metric="confidence" + df_freq_items_with_colnames, df, len(df), min_threshold=0.8, metric="confidence" ) assert res_df.values.shape[0] == 9 +def test_representativity(): + res_df = association_rules(df_freq_items, df, len(df), min_threshold=1.0, metric="representativity") + assert res_df.values.shape[0] == 16 + + res_df = association_rules( + df_freq_items_with_colnames, df, len(df), min_threshold=1.0, metric="representativity" + ) + assert res_df.values.shape[0] == 16 + + def test_jaccard(): - res_df = association_rules(df_freq_items, min_threshold=0.7, metric="jaccard") + res_df = association_rules(df_freq_items, df, len(df), min_threshold=0.7, metric="jaccard") assert res_df.values.shape[0] == 8 res_df = association_rules( - df_freq_items_with_colnames, min_threshold=0.7, metric="jaccard" + df_freq_items_with_colnames, df, len(df), min_threshold=0.7, metric="jaccard" ) assert res_df.values.shape[0] == 8 def test_certainty(): - res_df = association_rules(df_freq_items, metric="certainty", min_threshold=0.6) + res_df = association_rules(df_freq_items, df, len(df), metric="certainty", min_threshold=0.6) assert res_df.values.shape[0] == 3 res_df = association_rules( - df_freq_items_with_colnames, metric="certainty", min_threshold=0.6 + df_freq_items_with_colnames, df, len(df), metric="certainty", min_threshold=0.6 ) assert res_df.values.shape[0] == 3 def test_kulczynski(): - res_df = association_rules(df_freq_items, metric="kulczynski", min_threshold=0.9) + res_df = association_rules(df_freq_items, df, len(df), metric="kulczynski", min_threshold=0.9) assert res_df.values.shape[0] == 2 res_df = association_rules( - df_freq_items_with_colnames, metric="kulczynski", min_threshold=0.6 + df_freq_items_with_colnames, df, len(df), metric="kulczynski", min_threshold=0.6 ) assert res_df.values.shape[0] == 16 def test_frozenset_selection(): - res_df = association_rules(df_freq_items) + res_df = association_rules(df_freq_items, df, len(df)) sel = res_df[res_df["consequents"] == frozenset((3, 5))] assert sel.values.shape[0] == 1 @@ -231,17 +251,19 @@ def test_frozenset_selection(): def test_override_metric_with_support(): - res_df = association_rules(df_freq_items_with_colnames, min_threshold=0.8) + res_df = association_rules( + df_freq_items_with_colnames, df, len(df), min_threshold=0.8 + ) # default metric is confidence assert res_df.values.shape[0] == 9 res_df = association_rules( - df_freq_items_with_colnames, min_threshold=0.8, metric="support" + df_freq_items_with_colnames, df, len(df), min_threshold=0.8, metric="support" ) assert res_df.values.shape[0] == 2 res_df = association_rules( - df_freq_items_with_colnames, min_threshold=0.8, support_only=True + df_freq_items_with_colnames, df, len(df), min_threshold=0.8, support_only=True ) assert res_df.values.shape[0] == 2 @@ -274,7 +296,7 @@ def test_on_df_with_missing_entries(): df = pd.DataFrame(dict) - numpy_assert_raises(KeyError, association_rules, df) + numpy_assert_raises(KeyError, association_rules, df , df, len(df)) def test_on_df_with_missing_entries_support_only(): @@ -304,13 +326,13 @@ def test_on_df_with_missing_entries_support_only(): } df = pd.DataFrame(dict) - df_result = association_rules(df, support_only=True, min_threshold=0.1) + df_result = association_rules(df, df, len(df), support_only=True, min_threshold=0.1) assert df_result["support"].shape == (18,) assert int(np.isnan(df_result["support"].values).any()) != 1 def test_with_empty_dataframe(): - df = df_freq_items_with_colnames.iloc[:0] + df_freq = df_freq_items_with_colnames.iloc[:0] with pytest.raises(ValueError): - association_rules(df) + association_rules(df_freq, df, len(df))