From 1813e965ddfdc954283a1e75db8b1ec1831e1e39 Mon Sep 17 00:00:00 2001
From: Mavs <m.524687@gmail.com>
Date: Sun, 3 Dec 2023 20:19:57 +0100
Subject: [PATCH] reduce test time

---
 atom/data_cleaning.py        |  7 ++--
 atom/plots/predictionplot.py | 15 +++----
 atom/utils/utils.py          |  2 +-
 pyproject.toml               |  1 -
 tests/conftest.py            |  8 ++++
 tests/test_basemodel.py      |  4 +-
 tests/test_basetrainer.py    | 35 +++++++---------
 tests/test_models.py         | 14 +++----
 tests/test_plots.py          | 80 ++++++++++++++++++++++++++----------
 tests/test_utils.py          |  6 +--
 10 files changed, 102 insertions(+), 70 deletions(-)

diff --git a/atom/data_cleaning.py b/atom/data_cleaning.py
index 3f35703e2..8108985e6 100644
--- a/atom/data_cleaning.py
+++ b/atom/data_cleaning.py
@@ -38,12 +38,11 @@
 from scipy.stats import zscore
 from sklearn.base import BaseEstimator, _clone_parametrized
 from sklearn.compose import ColumnTransformer
-from sklearn.experimental import enable_iterative_imputer
+from sklearn.experimental import enable_iterative_imputer  # noqa: F401
 from sklearn.impute import IterativeImputer, KNNImputer
 from typing_extensions import Self
 
 from atom.basetransformer import BaseTransformer
-from atom.pipeline import Pipeline
 from atom.utils.constants import CAT_TYPES, DEFAULT_MISSING
 from atom.utils.types import (
     Bins, Bool, CategoricalStrats, DataFrame, DiscretizerStrats, Engine,
@@ -53,8 +52,8 @@
     dataframe_t, sequence_t, series_t,
 )
 from atom.utils.utils import (
-    bk, check_is_fitted, composed, crash, get_col_order, get_cols, it, lst,
-    merge, method_to_log, n_cols, replace_missing, sign, to_df, to_series,
+    bk, composed, crash, get_col_order, get_cols, it, lst, merge,
+    method_to_log, n_cols, replace_missing, sign, to_df, to_series,
     variable_return, wrap_methods,
 )
 
diff --git a/atom/plots/predictionplot.py b/atom/plots/predictionplot.py
index a1d30c3bc..5a7c3a817 100644
--- a/atom/plots/predictionplot.py
+++ b/atom/plots/predictionplot.py
@@ -1699,21 +1699,16 @@ class is always the positive one.
             for ds in ("train", "test"):
                 # Calculating shap values is computationally expensive,
                 # therefore, select a random subsample for large data sets
-                if len(data := getattr(m, ds)) > 500:
+                if len(data := getattr(m, f"X_{ds}")) > 500:
                     data = data.sample(500, random_state=self.random_state)
 
-                # Replace data with the calculated shap values
-                explanation = m._shap.get_explanation(data[m.branch.features], target_c)
-                data[m.branch.features] = explanation.values
+                explanation = m._shap.get_explanation(data, target_c)
+                shap = bk.DataFrame(explanation.values, columns=m.branch.features)
 
                 parshap[ds] = pd.Series(index=fxs, dtype=float)
                 for fx in fxs:
-                    # All other features are covariates
-                    covariates = [f for f in data.columns[:-1] if f != fx]
-                    cols = [fx, data.columns[-1], *covariates]
-
-                    # Compute covariance
-                    V = data[cols].cov()
+                    # Compute covariance (other variables are covariates)
+                    V = shap[[c for c in shap if c != fx]].cov()
 
                     # Inverse covariance matrix
                     Vi = np.linalg.pinv(V, hermitian=True)
diff --git a/atom/utils/utils.py b/atom/utils/utils.py
index 412a2b1cc..c76e51a76 100644
--- a/atom/utils/utils.py
+++ b/atom/utils/utils.py
@@ -1008,7 +1008,7 @@ def get_explanation(
                     )
 
             # Remember shap values in the _shap_values attribute
-            self._shap_values = pd.concat(
+            self._shap_values = bk.concat(
                 [
                     self._shap_values,
                     bk.Series(list(self._explanation.values), index=calculate.index),
diff --git a/pyproject.toml b/pyproject.toml
index 83f0ca00f..149cc75bb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -118,7 +118,6 @@ ignore = [
 ]
 per-file-ignores = [
     "__init__.py: F401",  # Imported but unused
-    "data_cleaning.py: F401",  # Imported but unused (import experimental)
 ]
 
 [tool.isort]
diff --git a/tests/conftest.py b/tests/conftest.py
index c9ebbb474..832d61b74 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -11,6 +11,7 @@
 
 from pathlib import Path
 from typing import Any
+from unittest.mock import patch
 
 import numpy as np
 import pandas as pd
@@ -89,6 +90,13 @@ def change_current_dir(tmp_path: Path, monkeypatch: MonkeyPatch):
     monkeypatch.chdir(tmp_path)
 
 
+@pytest.fixture(autouse=True)
+def mock_mlflow_log_model():
+    """Mock mlflow's log_model function."""
+    with patch("mlflow.sklearn.log_model"):
+        yield
+
+
 def get_train_test(
     X: XSelector | None,
     y: Sequence[Any] | DataFrame,
diff --git a/tests/test_basemodel.py b/tests/test_basemodel.py
index 4b15b3ce3..26b2a820a 100644
--- a/tests/test_basemodel.py
+++ b/tests/test_basemodel.py
@@ -291,8 +291,8 @@ def test_nested_runs_to_mlflow(mlflow):
     """Assert that the trials are logged to mlflow as nested runs."""
     atom = ATOMClassifier(X_bin, y_bin, experiment="test", random_state=1)
     atom.log_ht = True
-    atom.run("Tree", n_trials=3)
-    assert mlflow.call_count == 4  # n_trials + fit
+    atom.run("Tree", n_trials=1, errors='raise')
+    assert mlflow.call_count == 2  # n_trials + fit
 
 
 @patch("mlflow.log_params")
diff --git a/tests/test_basetrainer.py b/tests/test_basetrainer.py
index c0dee116e..233e808f2 100644
--- a/tests/test_basetrainer.py
+++ b/tests/test_basetrainer.py
@@ -71,16 +71,16 @@ def test_invalid_model_name():
 
 def test_multiple_models_with_add():
     """Assert that you can add model names to select them."""
-    trainer = DirectClassifier("gnb+lr+lr_2", random_state=1)
+    trainer = DirectClassifier("Dummy+tree+tree_2", random_state=1)
     trainer.run(bin_train, bin_test)
-    assert trainer.models == ["GNB", "LR", "LR_2"]
+    assert trainer.models == ["Dummy", "Tree", "Tree_2"]
 
 
 def test_multiple_same_models():
     """Assert that the same model can used with different names."""
-    trainer = DirectClassifier(["lr", "lr_2", "lr_3"], random_state=1)
+    trainer = DirectClassifier(["Tree", "Tree_2", "Tree_3"], random_state=1)
     trainer.run(bin_train, bin_test)
-    assert trainer.models == ["LR", "LR_2", "LR_3"]
+    assert trainer.models == ["Tree", "Tree_2", "Tree_3"]
 
 
 def test_only_task_models():
@@ -378,21 +378,25 @@ def test_errors_keep():
     assert trainer._models == [trainer.lda]
 
 
-def test_parallel_with_ray():
+@patch("atom.basetransformer.ray")
+@patch("atom.basetrainer.ray")
+def test_parallel_with_ray(_, __):
     """Assert that parallel runs successfully with ray backend."""
     trainer = DirectClassifier(
         models=["LR", "LDA"],
         parallel=True,
-        n_jobs=2,
+        n_jobs=1,
         backend="ray",
         random_state=1,
     )
-    trainer.run(bin_train, bin_test)
-    assert trainer._models == [trainer.lr, trainer.lda]
+    # Fails because Mock returns empty list
+    with pytest.raises(RuntimeError, match=".*All models failed.*"):
+        trainer.run(bin_train, bin_test)
     ray.shutdown()
 
 
-def test_parallel():
+@patch("atom.basetrainer.Parallel")
+def test_parallel(_):
     """Assert that parallel runs successfully."""
     trainer = DirectClassifier(
         models=["LR", "LDA"],
@@ -400,17 +404,6 @@ def test_parallel():
         n_jobs=2,
         random_state=1,
     )
-    trainer.run(bin_train, bin_test)
-    assert trainer._models == [trainer.lr, trainer.lda]
-
-
-def test_all_models_failed():
-    """Assert that an error is raised when all models failed."""
-    trainer = DirectClassifier(
-        models=["LR", "RF"],
-        n_trials=1,
-        ht_params={"distributions": "test"},
-        random_state=1,
-    )
+    # Fails because Mock returns empty list
     with pytest.raises(RuntimeError, match=".*All models failed.*"):
         trainer.run(bin_train, bin_test)
diff --git a/tests/test_models.py b/tests/test_models.py
index 0f0fdba69..668951f83 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -8,7 +8,7 @@
 """
 
 from platform import machine
-from unittest.mock import MagicMock, patch
+from unittest.mock import Mock, patch
 
 import numpy as np
 import pandas as pd
@@ -130,9 +130,9 @@ def test_models_sklearnex_regression():
 
 @patch.dict(
     "sys.modules", {
-        "cuml": MagicMock(spec=["__spec__"]),
-        "cuml.common.device_selection": MagicMock(spec=["set_global_device_type"]),
-        "cuml.internals.memory_utils": MagicMock(spec=["set_global_output_type"]),
+        "cuml": Mock(spec=["__spec__"]),
+        "cuml.common.device_selection": Mock(spec=["set_global_device_type"]),
+        "cuml.internals.memory_utils": Mock(spec=["set_global_output_type"]),
     }
 )
 def test_models_cuml_classification():
@@ -159,9 +159,9 @@ def test_models_cuml_classification():
 
 @patch.dict(
     "sys.modules", {
-        "cuml": MagicMock(spec=["__spec__"]),
-        "cuml.common.device_selection": MagicMock(spec=["set_global_device_type"]),
-        "cuml.internals.memory_utils": MagicMock(spec=["set_global_output_type"]),
+        "cuml": Mock(spec=["__spec__"]),
+        "cuml.common.device_selection": Mock(spec=["set_global_device_type"]),
+        "cuml.internals.memory_utils": Mock(spec=["set_global_output_type"]),
     }
 )
 def test_models_cuml_regression():
diff --git a/tests/test_plots.py b/tests/test_plots.py
index a06a9ab38..6f69d07a3 100644
--- a/tests/test_plots.py
+++ b/tests/test_plots.py
@@ -9,10 +9,13 @@
 
 import glob
 from pathlib import Path
-from unittest.mock import patch
+from unittest.mock import Mock, patch
 
+import numpy as np
 import pandas as pd
 import pytest
+from optuna.visualization._terminator_improvement import _ImprovementInfo
+from shap.plots._force import AdditiveForceVisualizer
 from sklearn.metrics import f1_score, get_scorer
 
 from atom import ATOMClassifier, ATOMForecaster, ATOMRegressor
@@ -337,13 +340,13 @@ def test_plot_relationships():
 @pytest.mark.parametrize("scoring", [None, "auc"])
 def test_plot_rfecv(scoring):
     """Assert that the plot_rfecv method works """
-    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
+    atom = ATOMClassifier(X_bin, y_bin, n_rows=0.1, random_state=1)
 
     # Didn't run RFECV
     with pytest.raises(PermissionError, match=".*using the 'rfecv' strategy.*"):
         atom.plot_rfecv(display=False)
 
-    atom.feature_selection("rfecv", solver="lr", n_features=20, scoring=scoring)
+    atom.feature_selection("rfecv", solver="tree", n_features=20, scoring=scoring)
     atom.plot_rfecv(display=False)
 
 
@@ -357,6 +360,14 @@ def test_plot_wordcloud():
 
 # Test HyperparameterTuningPlot ==================================== >>
 
+def test_check_hyperparams():
+    """Assert that an error is raised when models didn't run HT."""
+    atom = ATOMClassifier(X_bin, y_bin, random_state=1)
+    atom.run("Tree")
+    with pytest.raises(PermissionError, match=".*models that ran hyperparameter.*"):
+        atom._check_hyperparams([atom.tree])
+
+
 def test_get_hyperparams():
     """Assert that hyperparameters can be retrieved."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
@@ -396,8 +407,8 @@ def test_plot_edf():
 def test_plot_hyperparameter_importance():
     """Assert that the plot_hyperparameter_importance method works."""
     atom = ATOMRegressor(X_reg, y_reg, random_state=1)
-    atom.run("lasso", n_trials=3)
-    atom.plot_hyperparameter_importance(display=False)
+    atom.run("lasso", metric=["mse", "r2"], n_trials=3)
+    atom.plot_hyperparameter_importance(metric=1, display=False)
 
 
 def test_plot_hyperparameters():
@@ -438,8 +449,11 @@ def test_plot_slice():
     atom.plot_slice(display=False)
 
 
-def test_plot_terminator_improvements():
+@patch("atom.plots.hyperparametertuningplot._get_improvement_info")
+def test_plot_terminator_improvements(improvement):
     """Assert that the plot_terminator_improvement method works."""
+    improvement.return_value = _ImprovementInfo([], [], [])
+
     atom = ATOMClassifier(X_class, y_class, random_state=1)
     atom.run("tree", n_trials=1)
 
@@ -478,12 +492,12 @@ def test_plot_confusion_matrix():
     """Assert that the plot_confusion_matrix method works."""
     # For binary classification tasks
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
-    atom.run(["RF", "LGB"])
+    atom.run(["RF", "LGB"], est_params={"n_estimators": 5})
     atom.plot_confusion_matrix(threshold=0.2, display=False)
 
     # For multiclass classification tasks
     atom = ATOMClassifier(X_class, y_class, random_state=1)
-    atom.run(["RF", "LGB"])
+    atom.run(["RF", "LGB"], est_params={"n_estimators": 5})
 
     # Not available for multiclass
     with pytest.raises(NotImplementedError, match=".*not support the comparison.*"):
@@ -495,7 +509,7 @@ def test_plot_confusion_matrix():
 def test_plot_det():
     """Assert that the plot_det method works."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
-    atom.run(["LGB", "SVM"])
+    atom.run(["LGB", "SVM"], est_params={"LGB": {"n_estimators": 5}})
     atom.plot_det(display=False)
 
 
@@ -509,7 +523,14 @@ def test_plot_errors():
 def test_plot_evals():
     """Assert that the plot_evals method works."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
-    atom.run(["LR", "LGB", "MLP"], metric="f1")
+    atom.run(
+        models=["LR", "LGB", "MLP"],
+        metric="f1",
+        est_params={
+            "LGB": {"n_estimators": 5},
+            "MLP": {"hidden_layer_sizes": (5,), "max_iter": 5},
+        },
+    )
 
     # No in-training validation
     with pytest.raises(ValueError, match=".*no in-training validation.*"):
@@ -541,7 +562,7 @@ def test_plot_gains():
 def test_plot_learning_curve():
     """Assert that the plot_learning_curve method works."""
     atom = ATOMRegressor(X_reg, y_reg, random_state=1)
-    atom.train_sizing(["Tree", "LGB"], errors="raise", n_bootstrap=4)
+    atom.train_sizing(["Dummy", "Tree"], n_bootstrap=4)
     atom.plot_learning_curve(display=False)
 
 
@@ -556,12 +577,14 @@ def test_plot_parshap():
     """Assert that the plot_parshap method works."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
     atom.balance("smote")  # To get samples over 500
-    atom.run(["GNB", "LR"])
+    atom.run(["Dummy", "Tree"])
     atom.plot_parshap(display=False)  # With colorbar
-    atom.gnb.plot_parshap(display=False)  # Without colorbar
+    atom.dummy.plot_parshap(display=False)  # Without colorbar
 
 
-def test_plot_partial_dependence():
+@patch("atom.plots.predictionplot.Parallel")
+@patch("atom.plots.predictionplot.partial_dependence")
+def test_plot_partial_dependence(_, __):
     """Assert that the plot_partial_dependence method works."""
     atom = ATOMClassifier(X_label, y=y_label, stratify=False, random_state=1)
     atom.run("Tree")
@@ -569,7 +592,7 @@ def test_plot_partial_dependence():
         atom.plot_partial_dependence(display=False)
 
     atom = ATOMClassifier(X_bin, y_bin, n_jobs=-1, random_state=1)
-    atom.run(["KNN", "LGB"])
+    atom.run(["KNN", "LGB"], est_params={"LGB": {"n_estimators": 5}})
 
     # Pair for multimodel
     with pytest.raises(ValueError, match=".*when plotting multiple models.*"):
@@ -588,8 +611,11 @@ def test_plot_partial_dependence():
     atom.tree.plot_partial_dependence(columns=[0, 1], pair=2, display=False)
 
 
-def test_plot_permutation_importance():
+@patch("atom.plots.predictionplot.permutation_importance")
+def test_plot_permutation_importance(importances):
     """Assert that the plot_permutation_importance method works."""
+    importances.return_value = {"importances": np.array(range(len(X_bin)))}
+
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
     atom.run("Tree", metric="f1")
     atom.plot_permutation_importance(display=False)
@@ -691,7 +717,11 @@ def test_plot_roc():
 def test_plot_successive_halving():
     """Assert that the plot_successive_halving method works."""
     atom = ATOMClassifier(X_bin, y_bin, random_state=1)
-    atom.successive_halving(["Tree", "Bag", "RF", "LGB"], n_bootstrap=4)
+    atom.successive_halving(
+        models=["Bag", "RF", "LGB"],
+        est_params={"n_estimators": 5},
+        n_bootstrap=3,
+    )
     atom.plot_successive_halving(display=False)
 
 
@@ -737,8 +767,8 @@ def test_plot_shap_bar():
 
 def test_plot_shap_beeswarm():
     """Assert that the plot_shap_beeswarm method works."""
-    atom = ATOMClassifier(X_class, y_class, random_state=1)
-    atom.run("LR", metric="f1_macro")
+    atom = ATOMClassifier(X_class, y_class, n_rows=0.1, random_state=1)
+    atom.run("GNB", metric="f1_macro")
     atom.plot_shap_beeswarm(display=False)
 
 
@@ -749,10 +779,18 @@ def test_plot_shap_decision():
     atom.lr.plot_shap_decision(display=False)
 
 
-def test_plot_shap_force():
+@patch("shap.force_plot")
+def test_plot_shap_force(plot):
     """Assert that the plot_shap_force method works."""
+    plot.return_value = Mock(spec=AdditiveForceVisualizer)
+    plot.return_value.html.return_value = ""
+
     atom = ATOMClassifier(X_class, y_class, random_state=1)
-    atom.run(["LR", "MLP"], metric="MSE")
+    atom.run(
+        models=["LR", "MLP"],
+        metric="MSE",
+        est_params={"MLP": {"hidden_layer_sizes": (5,), "max_iter": 5}},
+    )
 
     # Expected value from Explainer
     atom.lr.plot_shap_force(rows=100, matplotlib=True, display=False)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 856fea50e..3d3e84305 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -8,7 +8,7 @@
 """
 
 from datetime import timedelta
-from unittest.mock import MagicMock
+from unittest.mock import Mock
 
 import pandas as pd
 import pytest
@@ -52,8 +52,8 @@ def test_time_to_string():
 
 def test_to_pandas_with_cuml():
     """Assert that cuML objects use the to_pandas method."""
-    to_df(MagicMock(spec=["to_pandas"]), columns=[0, 1])
-    to_series(MagicMock(spec=["to_pandas"]))
+    to_df(Mock(spec=["to_pandas"]), columns=[0, 1])
+    to_series(Mock(spec=["to_pandas"]))
 
 
 def test_check_is_fitted_with_pandas():