From 1080e37ea6936e9e8510ab854218e144ae2c5d84 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Franz=20Kir=C3=A1ly?= <f.kiraly@ucl.ac.uk>
Date: Sat, 27 Jul 2024 21:07:04 +0100
Subject: [PATCH] [ENH] differential testing for CI tests (#435)

This PR introduces differential testing for the CI tests, i.e., running
tests only if the object or module they test has been changed.

* runs tests in `benchmarking` only if anything in the module has
changed, or in `metrics`
* runs tests in `datatypes` only if anything in the module has changed
* runts tests in `utils` only if anything in the module has changed

Also makes the following changes

* moves `test_polars` to `datatypes`, the module it tests - FYI
@julian-fong
---
 skpro/benchmarking/tests/test_evaluate.py     |  5 ++
 skpro/datatypes/tests/test_check.py           | 26 ++++++++++
 skpro/datatypes/tests/test_convert.py         |  7 +++
 skpro/datatypes/tests/test_convert_to.py      | 15 ++++++
 skpro/datatypes/tests/test_lookup.py          | 13 +++++
 skpro/{ => datatypes}/tests/test_polars.py    | 13 +++--
 .../tests/test_base_default_methods.py        |  6 +++
 skpro/distributions/tests/test_base_scalar.py | 10 ++++
 skpro/distributions/tests/test_empirical.py   |  6 +++
 skpro/distributions/tests/test_proba_basic.py |  5 ++
 skpro/distributions/tests/test_qpd.py         |  4 ++
 skpro/tests/test_ensemble.py                  | 50 -------------------
 skpro/utils/tests/test_plots.py               | 10 ++--
 13 files changed, 113 insertions(+), 57 deletions(-)
 rename skpro/{ => datatypes}/tests/test_polars.py (89%)
 delete mode 100644 skpro/tests/test_ensemble.py

diff --git a/skpro/benchmarking/tests/test_evaluate.py b/skpro/benchmarking/tests/test_evaluate.py
index 3f4ad2a0b..e2c301d52 100644
--- a/skpro/benchmarking/tests/test_evaluate.py
+++ b/skpro/benchmarking/tests/test_evaluate.py
@@ -15,6 +15,7 @@
 from skpro.benchmarking.evaluate import evaluate
 from skpro.metrics import CRPS, EmpiricalCoverage, LogLoss, PinballLoss
 from skpro.regression.residual import ResidualDouble
+from skpro.tests.test_switch import run_test_module_changed
 from skpro.utils.validation._dependencies import _check_soft_dependencies
 
 
@@ -70,6 +71,10 @@ def _get_pred_method(scoring):
 METRICS = [CRPS, EmpiricalCoverage, LogLoss, PinballLoss]
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed(["skpro.benchmarking", "skpro.metrics"]),
+    reason="Test only if skpro.benchmarking or skpro.metrics has been changed",
+)
 @pytest.mark.parametrize("cv", CVs)
 @pytest.mark.parametrize("scoring", METRICS)
 @pytest.mark.parametrize("backend", [None, "dask", "loky", "threading"])
diff --git a/skpro/datatypes/tests/test_check.py b/skpro/datatypes/tests/test_check.py
index 5fdc0d1cb..b703cd413 100644
--- a/skpro/datatypes/tests/test_check.py
+++ b/skpro/datatypes/tests/test_check.py
@@ -3,6 +3,7 @@
 __author__ = ["fkiraly"]
 
 import numpy as np
+import pytest
 
 from skpro.datatypes._check import (
     AMBIGUOUS_MTYPES,
@@ -14,6 +15,7 @@
 from skpro.datatypes._check import scitype as infer_scitype
 from skpro.datatypes._examples import get_examples
 from skpro.datatypes._registry import SCITYPE_LIST, scitype_to_mtype
+from skpro.tests.test_switch import run_test_module_changed
 
 SCITYPES = SCITYPE_LIST
 
@@ -104,6 +106,10 @@ def pytest_generate_tests(metafunc):
         metafunc.parametrize("scitype,mtype", keys, ids=ids)
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.datatypes"),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_check_positive(scitype, mtype, fixture_index):
     """Tests that check_is_mtype correctly confirms the mtype of examples.
 
@@ -152,6 +158,10 @@ def test_check_positive(scitype, mtype, fixture_index):
         assert check_result[0], msg
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.datatypes"),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_check_positive_check_scitype(scitype, mtype, fixture_index):
     """Tests that check_is_scitype correctly confirms the scitype of examples.
 
@@ -201,6 +211,10 @@ def test_check_positive_check_scitype(scitype, mtype, fixture_index):
         assert check_result[2]["mtype"] == mtype
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.datatypes"),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_check_metadata_inference(scitype, mtype, fixture_index):
     """Tests that check_is_mtype correctly infers metadata of examples.
 
@@ -304,6 +318,10 @@ def test_check_metadata_inference(scitype, mtype, fixture_index):
                 assert metadata[metadata_key] == expected_metadata[metadata_key], msg
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.datatypes"),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_check_negative(scitype, mtype):
     """Tests that check_is_mtype correctly identifies wrong mtypes of examples.
 
@@ -369,6 +387,10 @@ def test_check_negative(scitype, mtype):
                 )
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.datatypes"),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_mtype_infer(scitype, mtype, fixture_index):
     """Tests that mtype correctly infers the mtype of examples.
 
@@ -417,6 +439,10 @@ def test_mtype_infer(scitype, mtype, fixture_index):
 SCITYPES_FOR_INFER_TEST = list(set(SCITYPE_LIST).difference(SKIP_SCITYPES))
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.datatypes"),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_scitype_infer(scitype, mtype, fixture_index):
     """Tests that scitype correctly infers the mtype of examples.
 
diff --git a/skpro/datatypes/tests/test_convert.py b/skpro/datatypes/tests/test_convert.py
index 6a0f82af6..89db16cf0 100644
--- a/skpro/datatypes/tests/test_convert.py
+++ b/skpro/datatypes/tests/test_convert.py
@@ -2,9 +2,12 @@
 
 __author__ = ["fkiraly"]
 
+import pytest
+
 from skpro.datatypes import SCITYPE_REGISTER, scitype_to_mtype
 from skpro.datatypes._convert import _conversions_defined, convert
 from skpro.datatypes._examples import get_examples
+from skpro.tests.test_switch import run_test_module_changed
 from skpro.utils import deep_equals
 
 SCITYPES = [sci[0] for sci in SCITYPE_REGISTER]
@@ -71,6 +74,10 @@ def pytest_generate_tests(metafunc):
     metafunc.parametrize("scitype,from_mtype,to_mtype,fixture_index", keys, ids=ids)
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed(["skpro.datatypes", "skpro.utils"]),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_convert(scitype, from_mtype, to_mtype, fixture_index):
     """Tests that conversions for scitype agree with from/to example fixtures.
 
diff --git a/skpro/datatypes/tests/test_convert_to.py b/skpro/datatypes/tests/test_convert_to.py
index fb133e026..1c5287dbd 100644
--- a/skpro/datatypes/tests/test_convert_to.py
+++ b/skpro/datatypes/tests/test_convert_to.py
@@ -2,8 +2,11 @@
 
 __author__ = ["fkiraly"]
 
+import pytest
+
 from skpro.datatypes._convert import convert_to
 from skpro.datatypes._examples import get_examples
+from skpro.tests.test_switch import run_test_module_changed
 from skpro.utils import deep_equals
 
 # hard-coded scitypes/mtypes to use in test_convert_to
@@ -13,6 +16,10 @@
 MTYPES_TABLE = ["list_of_dict", "pd_Series_Table", "numpy2D"]
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed(["skpro.datatypes", "skpro.utils"]),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_convert_to_simple():
     """Testing convert_to basic call works."""
     scitype = SCITYPES[0]
@@ -29,6 +36,10 @@ def test_convert_to_simple():
     assert deep_equals(converted, exp_fixt), msg
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed(["skpro.datatypes", "skpro.utils"]),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_convert_to_without_scitype():
     """Testing convert_to call without scitype specification."""
     scitype = SCITYPES[0]
@@ -45,6 +56,10 @@ def test_convert_to_without_scitype():
     assert deep_equals(converted, exp_fixt), msg
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed(["skpro.datatypes", "skpro.utils"]),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_convert_to_mtype_list():
     """Testing convert_to call to_type being a list, of same scitype."""
     # convert_to list
diff --git a/skpro/datatypes/tests/test_lookup.py b/skpro/datatypes/tests/test_lookup.py
index b3283fffc..36629b96f 100644
--- a/skpro/datatypes/tests/test_lookup.py
+++ b/skpro/datatypes/tests/test_lookup.py
@@ -10,11 +10,16 @@
     mtype_to_scitype,
     scitype_to_mtype,
 )
+from skpro.tests.test_switch import run_test_module_changed
 from skpro.utils.validation._dependencies import _check_soft_dependencies
 
 MTYPE_SCITYPE_PAIRS = [(k[0], k[1]) for k in MTYPE_REGISTER]
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.datatypes"),
+    reason="Test only if skpro.datatypes has been changed",
+)
 @pytest.mark.parametrize("mtype, scitype", MTYPE_SCITYPE_PAIRS)
 def test_mtype_to_scitype(mtype, scitype):
     """Tests that mtype_to_scitype yields the correct output for a string.
@@ -37,6 +42,10 @@ def test_mtype_to_scitype(mtype, scitype):
     assert result == scitype, msg
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.datatypes"),
+    reason="Test only if skpro.datatypes has been changed",
+)
 def test_mtype_to_scitype_list():
     """Tests that mtype_to_scitype yields the correct output for a list.
 
@@ -60,6 +69,10 @@ def test_mtype_to_scitype_list():
     assert result == expected_scitype_list, msg
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.datatypes"),
+    reason="Test only if skpro.datatypes has been changed",
+)
 @pytest.mark.parametrize("mtype, scitype", MTYPE_SCITYPE_PAIRS)
 def test_scitype_to_mtype(mtype, scitype):
     """Tests that scitype_to_mtype yields the correct output for a string.
diff --git a/skpro/tests/test_polars.py b/skpro/datatypes/tests/test_polars.py
similarity index 89%
rename from skpro/tests/test_polars.py
rename to skpro/datatypes/tests/test_polars.py
index 796454c94..acc72d2e6 100644
--- a/skpro/tests/test_polars.py
+++ b/skpro/datatypes/tests/test_polars.py
@@ -5,6 +5,7 @@
 from sklearn.datasets import load_diabetes
 from sklearn.model_selection import train_test_split
 
+from skpro.tests.test_switch import run_test_module_changed
 from skpro.utils.validation._dependencies import _check_soft_dependencies
 
 if _check_soft_dependencies(["polars", "pyarrow"], severity="none"):
@@ -53,7 +54,8 @@ def polars_load_diabetes_polars(polars_load_diabetes_pandas):
 
 
 @pytest.mark.skipif(
-    not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
+    not run_test_module_changed("skpro.datatypes")
+    or not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
     reason="skip test if polars/pyarrow is not installed in environment",
 )
 def test_polars_eager_conversion_methods(
@@ -76,7 +78,8 @@ def test_polars_eager_conversion_methods(
 
 
 @pytest.mark.skipif(
-    not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
+    not run_test_module_changed("skpro.datatypes")
+    or not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
     reason="skip test if polars/pyarrow is not installed in environment",
 )
 def test_polars_eager_regressor_in_fit_predict(
@@ -112,7 +115,8 @@ def test_polars_eager_regressor_in_fit_predict(
 
 
 @pytest.mark.skipif(
-    not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
+    not run_test_module_changed("skpro.datatypes")
+    or not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
     reason="skip test if polars/pyarrow is not installed in environment",
 )
 def test_polars_eager_regressor_in_predict_interval(
@@ -129,7 +133,8 @@ def test_polars_eager_regressor_in_predict_interval(
 
 
 @pytest.mark.skipif(
-    not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
+    not run_test_module_changed("skpro.datatypes")
+    or not _check_soft_dependencies(["polars", "pyarrow"], severity="none"),
     reason="skip test if polars/pyarrow is not installed in environment",
 )
 def test_polars_eager_regressor_in_predict_quantiles(
diff --git a/skpro/distributions/tests/test_base_default_methods.py b/skpro/distributions/tests/test_base_default_methods.py
index 5defa2677..d7b23dca9 100644
--- a/skpro/distributions/tests/test_base_default_methods.py
+++ b/skpro/distributions/tests/test_base_default_methods.py
@@ -14,9 +14,11 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 from scipy.special import erfinv
 
 from skpro.distributions.base import BaseDistribution
+from skpro.tests.test_switch import run_test_module_changed
 from skpro.utils.estimator_checks import check_estimator
 
 
@@ -92,6 +94,10 @@ def get_test_params(cls, parameter_set="default"):
         return [params1, params2, params3]
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.distributions"),
+    reason="run only if skpro.distributions has been changed",
+)
 def test_base_default():
     """Test default methods.
 
diff --git a/skpro/distributions/tests/test_base_scalar.py b/skpro/distributions/tests/test_base_scalar.py
index a0efe3063..dd603cd97 100644
--- a/skpro/distributions/tests/test_base_scalar.py
+++ b/skpro/distributions/tests/test_base_scalar.py
@@ -12,10 +12,16 @@
 
 import numpy as np
 import pandas as pd
+import pytest
 
 from skpro.distributions.normal import Normal
+from skpro.tests.test_switch import run_test_module_changed
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.distributions"),
+    reason="run only if skpro.distributions has been changed",
+)
 def test_scalar_distribution():
     """Test scalar distribution logic."""
     # test params
@@ -47,6 +53,10 @@ def test_scalar_distribution():
     assert spl_mult.index.equals(pd.RangeIndex(5))
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.distributions"),
+    reason="run only if skpro.distributions has been changed",
+)
 def test_broadcast_ambiguous():
     """Test broadcasting in cases of ambiguous parameter dimensions."""
     mu = [1]
diff --git a/skpro/distributions/tests/test_empirical.py b/skpro/distributions/tests/test_empirical.py
index 77a2587aa..b0cb452d6 100644
--- a/skpro/distributions/tests/test_empirical.py
+++ b/skpro/distributions/tests/test_empirical.py
@@ -1,10 +1,16 @@
 """Tests for Empirical distributions."""
 
 import pandas as pd
+import pytest
 
 from skpro.distributions.empirical import Empirical
+from skpro.tests.test_switch import run_test_module_changed
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.distributions"),
+    reason="run only if skpro.distributions has been changed",
+)
 def test_empirical_iat_index():
     """Test that the index is correctly set after iat call."""
     spl_idx = pd.MultiIndex.from_product([[0, 1], [0, 1, 2]], names=["sample", "time"])
diff --git a/skpro/distributions/tests/test_proba_basic.py b/skpro/distributions/tests/test_proba_basic.py
index e3901aa1c..56a2f0140 100644
--- a/skpro/distributions/tests/test_proba_basic.py
+++ b/skpro/distributions/tests/test_proba_basic.py
@@ -8,9 +8,14 @@
 import pandas as pd
 import pytest
 
+from skpro.tests.test_switch import run_test_module_changed
 from skpro.utils.validation._dependencies import _check_soft_dependencies
 
 
+@pytest.mark.skipif(
+    not run_test_module_changed("skpro.distributions"),
+    reason="run only if skpro.distributions has been changed",
+)
 def test_proba_example():
     """Test one subsetting case for BaseDistribution."""
     from skpro.distributions.normal import Normal
diff --git a/skpro/distributions/tests/test_qpd.py b/skpro/distributions/tests/test_qpd.py
index 0bf070516..5c4245cb3 100644
--- a/skpro/distributions/tests/test_qpd.py
+++ b/skpro/distributions/tests/test_qpd.py
@@ -25,6 +25,10 @@ def test_qpd_b_simple_use():
     qpd.mean()
 
 
+@pytest.mark.skipif(
+    not run_test_for_class(QPD_B),
+    reason="run test only if softdeps are present and incrementally (if requested)",  #
+)
 def test_qpd_b_pdf():
     """Test pdf of qpd with bounded mode."""
     # these parameters should produce a uniform on -0.5, 0.5
diff --git a/skpro/tests/test_ensemble.py b/skpro/tests/test_ensemble.py
deleted file mode 100644
index 5a17b9712..000000000
--- a/skpro/tests/test_ensemble.py
+++ /dev/null
@@ -1,50 +0,0 @@
-"""LEGACY MODULE - TODO: remove or refactor."""
-
-import pytest
-from sklearn.ensemble import BaggingRegressor as ClassicBaggingRegressor
-from sklearn.metrics import mean_squared_error as mse
-from sklearn.tree import DecisionTreeRegressor
-
-from skpro.workflow.manager import DataManager
-
-
-def prediction(model, data):
-    return model.fit(data.X_train, data.y_train).predict(data.X_test)
-
-
-@pytest.mark.skip(reason="loss assert fails sporadically")
-def test_bagging_wrapper():
-    data = DataManager("boston")
-
-    # Run classic sklearn bagging mechanism to ensure we have
-    # the correct parameters in place
-    baseline_classic = prediction(DecisionTreeRegressor(), data)
-
-    bagged_classic = prediction(ClassicBaggingRegressor(DecisionTreeRegressor()), data)
-    #
-    # # Does the bagging reduce the loss?
-    assert mse(data.y_test, baseline_classic) > mse(data.y_test, bagged_classic)
-
-    # Run corresponding skpro bagging mechanism
-
-    # clf = DecisionTreeRegressor()
-    #
-    # baseline_prediction = prediction(
-    #     ParametricEstimator(point=clf),
-    #     data
-    # )
-    #
-    # skpro_bagging_prediction = prediction(
-    #     SkproBaggingRegressor(
-    #         ParametricEstimator(point=clf),
-    #         n_estimators=10,
-    #         n_jobs=-1
-    #     ),
-    #     data
-    # )
-    #
-    # l1, l2 = loss(data.y_test, baseline_prediction),\
-    #          loss(data.y_test, skpro_bagging_prediction)
-    #
-    # # Does the bagging reduce the loss?
-    # assert l1 > l2
diff --git a/skpro/utils/tests/test_plots.py b/skpro/utils/tests/test_plots.py
index ed287efec..89484d3e1 100644
--- a/skpro/utils/tests/test_plots.py
+++ b/skpro/utils/tests/test_plots.py
@@ -3,11 +3,13 @@
 
 import pytest
 
+from skpro.tests.test_switch import run_test_module_changed
 from skpro.utils.validation._dependencies import _check_soft_dependencies
 
 
 @pytest.mark.skipif(
-    not _check_soft_dependencies("matplotlib", severity="none"),
+    not run_test_module_changed("skpro.utils")
+    or not _check_soft_dependencies("matplotlib", severity="none"),
     reason="skip test if required soft dependency for matplotlib not available",
 )
 def test_plot_crossplot_interval():
@@ -37,7 +39,8 @@ def test_plot_crossplot_interval():
 
 
 @pytest.mark.skipif(
-    not _check_soft_dependencies("matplotlib", severity="none"),
+    not run_test_module_changed("skpro.utils")
+    or not _check_soft_dependencies("matplotlib", severity="none"),
     reason="skip test if required soft dependency for matplotlib not available",
 )
 def test_plot_crossplot_std():
@@ -66,7 +69,8 @@ def test_plot_crossplot_std():
 
 
 @pytest.mark.skipif(
-    not _check_soft_dependencies("matplotlib", severity="none"),
+    not run_test_module_changed("skpro.utils")
+    or not _check_soft_dependencies("matplotlib", severity="none"),
     reason="skip test if required soft dependency for matplotlib not available",
 )
 def test_plot_crossplot_loss():