From 8159936206043d30a22c7db1b26013def2b0f6d8 Mon Sep 17 00:00:00 2001
From: Matthew Middlehurst <pfm15hbu@gmail.com>
Date: Mon, 12 Aug 2024 21:25:38 +0100
Subject: [PATCH] [MNT] Unit testing revamp part 3: regression (#1875)

* classification checks in progress

* rework yield checks

* rework yield checks to allow for class input

* fixes

* fix

* pr testing split

* classification fixes

* regressor tests

* exclude tapnet

* missing value test data

* fix
---
 .../test_saving_loading_deep_learning_cls.py  |  83 -------
 .../tests/test_random_state_deep_regressor.py |  60 -----
 .../test_saving_loading_deep_learning_cls.py  |  79 ------
 aeon/regression/tests/test_all_regressors.py  | 118 ---------
 .../_yield_classification_checks.py           | 108 +++++++--
 .../_yield_estimator_checks.py                |   9 +
 .../_yield_regression_checks.py               | 227 ++++++++++++++++++
 aeon/testing/test_all_estimators.py           |   2 +-
 aeon/testing/test_config.py                   |   3 +-
 aeon/testing/tests/test_all_estimators.py     |   2 +-
 10 files changed, 334 insertions(+), 357 deletions(-)
 delete mode 100644 aeon/classification/deep_learning/tests/test_saving_loading_deep_learning_cls.py
 delete mode 100644 aeon/regression/deep_learning/tests/test_random_state_deep_regressor.py
 delete mode 100644 aeon/regression/deep_learning/tests/test_saving_loading_deep_learning_cls.py
 delete mode 100644 aeon/regression/tests/test_all_regressors.py
 create mode 100644 aeon/testing/estimator_checking/_yield_regression_checks.py

diff --git a/aeon/classification/deep_learning/tests/test_saving_loading_deep_learning_cls.py b/aeon/classification/deep_learning/tests/test_saving_loading_deep_learning_cls.py
deleted file mode 100644
index d90393a369..0000000000
--- a/aeon/classification/deep_learning/tests/test_saving_loading_deep_learning_cls.py
+++ /dev/null
@@ -1,83 +0,0 @@
-"""Unit tests for classifiers deep learners save/load functionalities."""
-
-import inspect
-import os
-import tempfile
-import time
-
-import numpy as np
-import pytest
-
-from aeon.classification import deep_learning
-from aeon.testing.data_generation import make_example_3d_numpy
-from aeon.utils.validation._dependencies import _check_soft_dependencies
-
-__maintainer__ = ["hadifawaz1999"]
-
-
-_deep_cls_classes = [
-    member[1] for member in inspect.getmembers(deep_learning, inspect.isclass)
-]
-
-
-@pytest.mark.skipif(
-    not _check_soft_dependencies(["tensorflow"], severity="none"),
-    reason="skip test if required soft dependency not available",
-)
-@pytest.mark.parametrize("deep_cls", _deep_cls_classes)
-def test_saving_loading_deep_learning_cls(deep_cls):
-    """Test Deep Classifier saving."""
-    with tempfile.TemporaryDirectory() as tmp:
-        if not (
-            deep_cls.__name__
-            in [
-                "BaseDeepClassifier",
-                "InceptionTimeClassifier",
-                "LITETimeClassifier",
-                "TapNetClassifier",
-            ]
-        ):
-            if tmp[-1] != "/":
-                tmp = tmp + "/"
-            curr_time = str(time.time_ns())
-            last_file_name = curr_time + "last"
-            best_file_name = curr_time + "best"
-            init_file_name = curr_time + "init"
-
-            X, y = make_example_3d_numpy()
-
-            deep_cls_train = deep_cls(
-                n_epochs=2,
-                save_best_model=True,
-                save_last_model=True,
-                save_init_model=True,
-                best_file_name=best_file_name,
-                last_file_name=last_file_name,
-                init_file_name=init_file_name,
-                file_path=tmp,
-            )
-            deep_cls_train.fit(X, y)
-
-            deep_cls_best = deep_cls()
-            deep_cls_best.load_model(
-                model_path=os.path.join(tmp, best_file_name + ".keras"),
-                classes=np.unique(y),
-            )
-            ypred_best = deep_cls_best.predict(X)
-            assert len(ypred_best) == len(y)
-
-            deep_cls_last = deep_cls()
-            deep_cls_last.load_model(
-                model_path=os.path.join(tmp, last_file_name + ".keras"),
-                classes=np.unique(y),
-            )
-            ypred_last = deep_cls_last.predict(X)
-            assert len(ypred_last) == len(y)
-
-            deep_cls_init = deep_cls()
-            deep_cls_init.load_model(
-                model_path=os.path.join(tmp, init_file_name + ".keras"),
-                classes=np.unique(y),
-            )
-            ypred_init = deep_cls_init.predict(X)
-            assert len(ypred_init) == len(y)
diff --git a/aeon/regression/deep_learning/tests/test_random_state_deep_regressor.py b/aeon/regression/deep_learning/tests/test_random_state_deep_regressor.py
deleted file mode 100644
index 3c78367348..0000000000
--- a/aeon/regression/deep_learning/tests/test_random_state_deep_regressor.py
+++ /dev/null
@@ -1,60 +0,0 @@
-"""Unit tests for regressors deep learning random_state functionality."""
-
-import inspect
-
-import numpy as np
-import pytest
-
-from aeon.regression import deep_learning
-from aeon.testing.data_generation import make_example_3d_numpy
-from aeon.utils.validation._dependencies import _check_soft_dependencies
-
-__maintainer__ = ["hadifawaz1999"]
-
-
-@pytest.mark.skipif(
-    not _check_soft_dependencies(["tensorflow"], severity="none"),
-    reason="skip test if required soft dependency not available",
-)
-def test_random_state_deep_learning_rgs():
-    """Test Deep Regressor seeding."""
-    random_state = 42
-
-    X, y = make_example_3d_numpy(random_state=random_state)
-
-    deep_rgs_classes = [
-        member[1] for member in inspect.getmembers(deep_learning, inspect.isclass)
-    ]
-
-    for i in range(len(deep_rgs_classes)):
-        if (
-            "BaseDeepRegressor" in str(deep_rgs_classes[i])
-            or "InceptionTimeRegressor" in str(deep_rgs_classes[i])
-            or "LITETimeRegressor" in str(deep_rgs_classes[i])
-            or "TapNetRegressor" in str(deep_rgs_classes[i])
-        ):
-            continue
-
-        deep_rgs1 = deep_rgs_classes[i](random_state=random_state, n_epochs=4)
-        deep_rgs1.fit(X, y)
-
-        layers1 = deep_rgs1.training_model_.layers[1:]
-
-        deep_rgs2 = deep_rgs_classes[i](random_state=random_state, n_epochs=4)
-        deep_rgs2.fit(X, y)
-
-        layers2 = deep_rgs2.training_model_.layers[1:]
-
-        assert len(layers1) == len(layers2)
-
-        for i in range(len(layers1)):
-            weights1 = layers1[i].get_weights()
-            weights2 = layers2[i].get_weights()
-
-            assert len(weights1) == len(weights2)
-
-            for j in range(len(weights1)):
-                _weight1 = np.asarray(weights1[j])
-                _weight2 = np.asarray(weights2[j])
-
-                np.testing.assert_almost_equal(_weight1, _weight2, 4)
diff --git a/aeon/regression/deep_learning/tests/test_saving_loading_deep_learning_cls.py b/aeon/regression/deep_learning/tests/test_saving_loading_deep_learning_cls.py
deleted file mode 100644
index 736d99baf3..0000000000
--- a/aeon/regression/deep_learning/tests/test_saving_loading_deep_learning_cls.py
+++ /dev/null
@@ -1,79 +0,0 @@
-"""Unit tests for regressors deep learners save/load functionalities."""
-
-import inspect
-import os
-import tempfile
-import time
-
-import pytest
-
-from aeon.regression import deep_learning
-from aeon.testing.data_generation import make_example_3d_numpy
-from aeon.utils.validation._dependencies import _check_soft_dependencies
-
-__maintainer__ = ["hadifawaz1999"]
-
-
-_deep_rgs_classes = [
-    member[1] for member in inspect.getmembers(deep_learning, inspect.isclass)
-]
-
-
-@pytest.mark.skipif(
-    not _check_soft_dependencies(["tensorflow"], severity="none"),
-    reason="skip test if required soft dependency not available",
-)
-@pytest.mark.parametrize("deep_rgs", _deep_rgs_classes)
-def test_saving_loading_deep_learning_rgs(deep_rgs):
-    """Test Deep Regressor saving."""
-    with tempfile.TemporaryDirectory() as tmp:
-        if not (
-            deep_rgs.__name__
-            in [
-                "BaseDeepRegressor",
-                "InceptionTimeRegressor",
-                "LITETimeRegressor",
-                "TapNetRegressor",
-            ]
-        ):
-            if tmp[-1] != "/":
-                tmp = tmp + "/"
-            curr_time = str(time.time_ns())
-            last_file_name = curr_time + "last"
-            best_file_name = curr_time + "best"
-            init_file_name = curr_time + "init"
-
-            X, y = make_example_3d_numpy()
-
-            deep_rgs_train = deep_rgs(
-                n_epochs=2,
-                save_best_model=True,
-                save_last_model=True,
-                save_init_model=True,
-                best_file_name=best_file_name,
-                last_file_name=last_file_name,
-                init_file_name=init_file_name,
-                file_path=tmp,
-            )
-            deep_rgs_train.fit(X, y)
-
-            deep_rgs_best = deep_rgs()
-            deep_rgs_best.load_model(
-                model_path=os.path.join(tmp, best_file_name + ".keras"),
-            )
-            ypred_best = deep_rgs_best.predict(X)
-            assert len(ypred_best) == len(y)
-
-            deep_rgs_last = deep_rgs()
-            deep_rgs_last.load_model(
-                model_path=os.path.join(tmp, last_file_name + ".keras"),
-            )
-            ypred_last = deep_rgs_last.predict(X)
-            assert len(ypred_last) == len(y)
-
-            deep_rgs_init = deep_rgs()
-            deep_rgs_init.load_model(
-                model_path=os.path.join(tmp, init_file_name + ".keras"),
-            )
-            ypred_init = deep_rgs_init.predict(X)
-            assert len(ypred_init) == len(y)
diff --git a/aeon/regression/tests/test_all_regressors.py b/aeon/regression/tests/test_all_regressors.py
deleted file mode 100644
index f8e4670c94..0000000000
--- a/aeon/regression/tests/test_all_regressors.py
+++ /dev/null
@@ -1,118 +0,0 @@
-"""Unit tests for all time series regressors."""
-
-__maintainer__ = []
-
-from sys import platform
-
-import numpy as np
-from sklearn.utils._testing import set_random_state
-
-from aeon.datasets import load_cardano_sentiment, load_covid_3month
-from aeon.testing.expected_results.expected_regressor_outputs import (
-    cardano_sentiment_preds,
-    covid_3month_preds,
-)
-from aeon.testing.test_all_estimators import BaseFixtureGenerator, QuickTester
-from aeon.testing.utils.estimator_checks import _assert_array_almost_equal
-
-
-class RegressorFixtureGenerator(BaseFixtureGenerator):
-    """Fixture generator for regression tests.
-
-    Fixtures parameterized
-    ----------------------
-    estimator_class: estimator inheriting from BaseObject
-        ranges over estimator classes not excluded by EXCLUDE_ESTIMATORS, EXCLUDED_TESTS
-    estimator_instance: instance of estimator inheriting from BaseObject
-        ranges over estimator classes not excluded by EXCLUDE_ESTIMATORS, EXCLUDED_TESTS
-        instances are generated by create_test_instance class method
-    scenario: instance of TestScenario
-        ranges over all scenarios returned by retrieve_scenarios
-    """
-
-    # note: this should be separate from TestAllRegressors
-    #   additional fixtures, parameters, etc should be added here
-    #   TestAllRegressors should contain the tests only
-
-    estimator_type_filter = "regressor"
-
-
-class TestAllRegressors(RegressorFixtureGenerator, QuickTester):
-    """Module level tests for all aeon regressors."""
-
-    def test_regressor_against_expected_results(self, estimator_class):
-        """Test classifier against stored results."""
-        # we only use the first estimator instance for testing
-        classname = estimator_class.__name__
-
-        # We cannot guarantee same results on ARM macOS
-        if platform == "darwin":
-            return None
-
-        for data_name, data_dict, data_loader, data_seed in [
-            ["Covid3Month", covid_3month_preds, load_covid_3month, 0],
-            ["CardanoSentiment", cardano_sentiment_preds, load_cardano_sentiment, 0],
-        ]:
-            # retrieve expected predict output, and skip test if not available
-            if classname in data_dict.keys():
-                expected_preds = data_dict[classname]
-            else:
-                # skip test if no expected preds are registered
-                continue
-
-            # we only use the first estimator instance for testing
-            estimator_instance = estimator_class.create_test_instance(
-                parameter_set="results_comparison"
-            )
-            # set random seed if possible
-            set_random_state(estimator_instance, 0)
-
-            # load test data
-            X_train, y_train = data_loader(split="train")
-            X_test, y_test = data_loader(split="test")
-            indices_train = np.random.RandomState(data_seed).choice(
-                len(y_train), 10, replace=False
-            )
-            indices_test = np.random.RandomState(data_seed).choice(
-                len(y_test), 10, replace=False
-            )
-
-            # train regressor and predict
-            estimator_instance.fit(X_train[indices_train], y_train[indices_train])
-            y_pred = estimator_instance.predict(X_test[indices_test])
-
-            # assert predictions are the same
-            _assert_array_almost_equal(
-                y_pred,
-                expected_preds,
-                decimal=2,
-                err_msg=f"Failed to reproduce results for {classname} on {data_name}",
-            )
-
-    def test_regressor_tags_consistent(self, estimator_class):
-        """Test the tag X_inner_type is consistent with capability:unequal_length."""
-        valid_types = {"np-list", "df-list", "pd-multivariate", "nested_univ"}
-        unequal = estimator_class.get_class_tag("capability:unequal_length")
-        if unequal:  # one of X_inner_types must be capable of storing unequal length
-            internal_types = estimator_class.get_class_tag("X_inner_type")
-            if isinstance(internal_types, str):
-                assert internal_types in valid_types
-            else:  # must be a list
-                assert bool(set(internal_types) & valid_types)
-        # Test can actually fit/predict with multivariate if tag is set
-        multivariate = estimator_class.get_class_tag("capability:multivariate")
-        if multivariate:
-            X = np.random.random((10, 2, 20))
-            y = np.random.random(10)
-            inst = estimator_class.create_test_instance(parameter_set="default")
-            inst.fit(X, y)
-            inst.predict(X)
-
-    def test_does_not_override_final_methods(self, estimator_class):
-        """Test does not override final methods."""
-        if "fit" in estimator_class.__dict__:
-            raise ValueError(f"Classifier {estimator_class} overrides the method fit")
-        if "predict" in estimator_class.__dict__:
-            raise ValueError(
-                f"Classifier {estimator_class} overrides the method " f"predict"
-            )
diff --git a/aeon/testing/estimator_checking/_yield_classification_checks.py b/aeon/testing/estimator_checking/_yield_classification_checks.py
index b670adbbdf..54fc89ffd0 100644
--- a/aeon/testing/estimator_checking/_yield_classification_checks.py
+++ b/aeon/testing/estimator_checking/_yield_classification_checks.py
@@ -1,4 +1,9 @@
+"""Tests for all classifiers."""
+
 import inspect
+import os
+import tempfile
+import time
 from functools import partial
 from sys import platform
 
@@ -21,15 +26,25 @@ def _yield_classification_checks(estimator_class, estimator_instances, datatypes
     """Yield all classification checks for an aeon classifier."""
     # only class required
     yield partial(
-        test_classifier_against_expected_results, estimator_class=estimator_class
+        check_classifier_against_expected_results, estimator_class=estimator_class
+    )
+    yield partial(check_classifier_tags_consistent, estimator_class=estimator_class)
+    yield partial(
+        check_classifier_does_not_override_final_methods,
+        estimator_class=estimator_class,
     )
-    yield partial(test_classifier_tags_consistent, estimator_class=estimator_class)
-    yield partial(test_does_not_override_final_methods, estimator_class=estimator_class)
 
     # data type irrelevant
     if _get_tag(estimator_class, "capability:contractable", raise_error=True):
         yield partial(
-            test_contracted_classifier,
+            check_contracted_classifier,
+            estimator_class=estimator_class,
+            datatype=datatypes[0][0],
+        )
+
+    if issubclass(estimator_class, BaseDeepClassifier):
+        yield partial(
+            check_classifier_saving_loading_deep_learning,
             estimator_class=estimator_class,
             datatype=datatypes[0][0],
         )
@@ -39,14 +54,14 @@ def _yield_classification_checks(estimator_class, estimator_instances, datatypes
         # data type irrelevant
         if _get_tag(estimator_class, "capability:train_estimate", raise_error=True):
             yield partial(
-                test_classifier_train_estimate,
+                check_classifier_train_estimate,
                 estimator=estimator,
                 datatype=datatypes[0][0],
             )
 
         if isinstance(estimator, BaseDeepClassifier):
             yield partial(
-                check_random_state_deep_learning,
+                check_classifier_random_state_deep_learning,
                 estimator=estimator,
                 datatype=datatypes[i][0],
             )
@@ -54,11 +69,11 @@ def _yield_classification_checks(estimator_class, estimator_instances, datatypes
         # test all data types
         for datatype in datatypes[i]:
             yield partial(
-                test_classifier_output, estimator=estimator, datatype=datatype
+                check_classifier_output, estimator=estimator, datatype=datatype
             )
 
 
-def test_classifier_against_expected_results(estimator_class):
+def check_classifier_against_expected_results(estimator_class):
     """Test classifier against stored results."""
     # we only use the first estimator instance for testing
     class_name = estimator_class.__name__
@@ -111,7 +126,7 @@ def test_classifier_against_expected_results(estimator_class):
         )
 
 
-def test_classifier_tags_consistent(estimator_class):
+def check_classifier_tags_consistent(estimator_class):
     """Test the tag X_inner_type is consistent with capability:unequal_length."""
     valid_types = {"np-list", "df-list", "pd-multivariate", "nested_univ"}
     unequal = estimator_class.get_class_tag("capability:unequal_length")
@@ -132,7 +147,7 @@ def test_classifier_tags_consistent(estimator_class):
         inst.predict_proba(X)
 
 
-def test_does_not_override_final_methods(estimator_class):
+def check_classifier_does_not_override_final_methods(estimator_class):
     """Test does not override final methods."""
     final_methods = [
         "fit",
@@ -149,7 +164,7 @@ def test_does_not_override_final_methods(estimator_class):
             )
 
 
-def test_contracted_classifier(estimator_class, datatype):
+def check_contracted_classifier(estimator_class, datatype):
     """Test classifiers that can be contracted."""
     estimator_instance = estimator_class.create_test_instance(
         parameter_set="contracting"
@@ -200,7 +215,72 @@ def test_contracted_classifier(estimator_class, datatype):
     )
 
 
-def test_classifier_train_estimate(estimator, datatype):
+def check_classifier_saving_loading_deep_learning(estimator_class, datatype):
+    """Test Deep Classifier saving."""
+    with tempfile.TemporaryDirectory() as tmp:
+        if not (
+            estimator_class.__name__
+            in [
+                "BaseDeepClassifier",
+                "InceptionTimeClassifier",
+                "LITETimeClassifier",
+                "TapNetClassifier",
+            ]
+        ):
+            if tmp[-1] != "/":
+                tmp = tmp + "/"
+            curr_time = str(time.time_ns())
+            last_file_name = curr_time + "last"
+            best_file_name = curr_time + "best"
+            init_file_name = curr_time + "init"
+
+            deep_cls_train = estimator_class(
+                n_epochs=2,
+                save_best_model=True,
+                save_last_model=True,
+                save_init_model=True,
+                best_file_name=best_file_name,
+                last_file_name=last_file_name,
+                init_file_name=init_file_name,
+                file_path=tmp,
+            )
+            deep_cls_train.fit(
+                FULL_TEST_DATA_DICT[datatype]["train"][0],
+                FULL_TEST_DATA_DICT[datatype]["train"][1],
+            )
+
+            deep_cls_best = estimator_class()
+            deep_cls_best.load_model(
+                model_path=os.path.join(tmp, best_file_name + ".keras"),
+                classes=np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]),
+            )
+            ypred_best = deep_cls_best.predict(
+                FULL_TEST_DATA_DICT[datatype]["train"][0]
+            )
+            assert len(ypred_best) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+
+            deep_cls_last = estimator_class()
+            deep_cls_last.load_model(
+                model_path=os.path.join(tmp, last_file_name + ".keras"),
+                classes=np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]),
+            )
+            ypred_last = deep_cls_last.predict(
+                FULL_TEST_DATA_DICT[datatype]["train"][0]
+            )
+            assert len(ypred_last) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+
+            deep_cls_init = estimator_class()
+            deep_cls_init.load_model(
+                model_path=os.path.join(tmp, init_file_name + ".keras"),
+                classes=np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]),
+            )
+            ypred_init = deep_cls_init.predict(
+                FULL_TEST_DATA_DICT[datatype]["train"][0]
+            )
+            assert len(ypred_init) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+
+
+def check_classifier_train_estimate(estimator, datatype):
     """Test classifiers that can produce train set probability estimates."""
     estimator = _clone_estimator(estimator)
     estimator_class = type(estimator)
@@ -241,7 +321,7 @@ def test_classifier_train_estimate(estimator, datatype):
     np.testing.assert_almost_equal(train_proba.sum(axis=1), 1, decimal=4)
 
 
-def check_random_state_deep_learning(estimator, datatype):
+def check_classifier_random_state_deep_learning(estimator, datatype):
     """Test Deep Classifier seeding."""
     random_state = 42
 
@@ -276,7 +356,7 @@ def check_random_state_deep_learning(estimator, datatype):
             np.testing.assert_almost_equal(_weight1, _weight2, 4)
 
 
-def test_classifier_output(estimator, datatype):
+def check_classifier_output(estimator, datatype):
     """Test classifier outputs the correct data types and values.
 
     Test predict produces a np.array or pd.Series with only values seen in the train
diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py
index 7426482c56..3cc212af8b 100644
--- a/aeon/testing/estimator_checking/_yield_estimator_checks.py
+++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py
@@ -16,10 +16,14 @@
 from aeon.classification import BaseClassifier
 from aeon.classification.deep_learning.base import BaseDeepClassifier
 from aeon.clustering.deep_learning.base import BaseDeepClusterer
+from aeon.regression import BaseRegressor
 from aeon.regression.deep_learning.base import BaseDeepRegressor
 from aeon.testing.estimator_checking._yield_classification_checks import (
     _yield_classification_checks,
 )
+from aeon.testing.estimator_checking._yield_regression_checks import (
+    _yield_regression_checks,
+)
 from aeon.testing.test_config import (
     NON_STATE_CHANGING_METHODS,
     NON_STATE_CHANGING_METHODS_ARRAYLIKE,
@@ -79,6 +83,11 @@ def _yield_all_aeon_checks(
             estimator_class, estimator_instances, datatypes
         )
 
+    if issubclass(estimator_class, BaseRegressor):
+        yield from _yield_regression_checks(
+            estimator_class, estimator_instances, datatypes
+        )
+
 
 def _yield_estimator_checks(estimator_class, estimator_instances, datatypes):
     """Yield all general checks for an aeon estimator."""
diff --git a/aeon/testing/estimator_checking/_yield_regression_checks.py b/aeon/testing/estimator_checking/_yield_regression_checks.py
new file mode 100644
index 0000000000..192a60fb00
--- /dev/null
+++ b/aeon/testing/estimator_checking/_yield_regression_checks.py
@@ -0,0 +1,227 @@
+"""Tests for all regressors."""
+
+import os
+import tempfile
+import time
+from functools import partial
+from sys import platform
+
+import numpy as np
+from sklearn.utils._testing import set_random_state
+
+from aeon.base._base import _clone_estimator
+from aeon.datasets import load_cardano_sentiment, load_covid_3month
+from aeon.regression.deep_learning import BaseDeepRegressor
+from aeon.testing.expected_results.expected_regressor_outputs import (
+    cardano_sentiment_preds,
+    covid_3month_preds,
+)
+from aeon.testing.testing_data import FULL_TEST_DATA_DICT
+from aeon.testing.utils.estimator_checks import _assert_array_almost_equal
+
+
+def _yield_regression_checks(estimator_class, estimator_instances, datatypes):
+    """Yield all regression checks for an aeon regressor."""
+    # only class required
+    yield partial(
+        check_regressor_against_expected_results, estimator_class=estimator_class
+    )
+    yield partial(check_regressor_tags_consistent, estimator_class=estimator_class)
+    yield partial(
+        check_regressor_does_not_override_final_methods, estimator_class=estimator_class
+    )
+
+    # data type irrelevant
+    if issubclass(estimator_class, BaseDeepRegressor):
+        yield partial(
+            check_regressor_saving_loading_deep_learning,
+            estimator_class=estimator_class,
+            datatype=datatypes[0][0],
+        )
+
+    # test class instances
+    for i, estimator in enumerate(estimator_instances):
+        # data type irrelevant
+        if isinstance(estimator, BaseDeepRegressor):
+            yield partial(
+                check_regressor_random_state_deep_learning,
+                estimator=estimator,
+                datatype=datatypes[i][0],
+            )
+
+
+def check_regressor_against_expected_results(estimator_class):
+    """Test classifier against stored results."""
+    # we only use the first estimator instance for testing
+    classname = estimator_class.__name__
+
+    # We cannot guarantee same results on ARM macOS
+    if platform == "darwin":
+        return None
+
+    for data_name, data_dict, data_loader, data_seed in [
+        ["Covid3Month", covid_3month_preds, load_covid_3month, 0],
+        ["CardanoSentiment", cardano_sentiment_preds, load_cardano_sentiment, 0],
+    ]:
+        # retrieve expected predict output, and skip test if not available
+        if classname in data_dict.keys():
+            expected_preds = data_dict[classname]
+        else:
+            # skip test if no expected preds are registered
+            continue
+
+        # we only use the first estimator instance for testing
+        estimator_instance = estimator_class.create_test_instance(
+            parameter_set="results_comparison"
+        )
+        # set random seed if possible
+        set_random_state(estimator_instance, 0)
+
+        # load test data
+        X_train, y_train = data_loader(split="train")
+        X_test, y_test = data_loader(split="test")
+        indices_train = np.random.RandomState(data_seed).choice(
+            len(y_train), 10, replace=False
+        )
+        indices_test = np.random.RandomState(data_seed).choice(
+            len(y_test), 10, replace=False
+        )
+
+        # train regressor and predict
+        estimator_instance.fit(X_train[indices_train], y_train[indices_train])
+        y_pred = estimator_instance.predict(X_test[indices_test])
+
+        # assert predictions are the same
+        _assert_array_almost_equal(
+            y_pred,
+            expected_preds,
+            decimal=2,
+            err_msg=f"Failed to reproduce results for {classname} on {data_name}",
+        )
+
+
+def check_regressor_tags_consistent(estimator_class):
+    """Test the tag X_inner_type is consistent with capability:unequal_length."""
+    valid_types = {"np-list", "df-list", "pd-multivariate", "nested_univ"}
+    unequal = estimator_class.get_class_tag("capability:unequal_length")
+    if unequal:  # one of X_inner_types must be capable of storing unequal length
+        internal_types = estimator_class.get_class_tag("X_inner_type")
+        if isinstance(internal_types, str):
+            assert internal_types in valid_types
+        else:  # must be a list
+            assert bool(set(internal_types) & valid_types)
+    # Test can actually fit/predict with multivariate if tag is set
+    multivariate = estimator_class.get_class_tag("capability:multivariate")
+    if multivariate:
+        X = np.random.random((10, 2, 20))
+        y = np.random.random(10)
+        inst = estimator_class.create_test_instance(parameter_set="default")
+        inst.fit(X, y)
+        inst.predict(X)
+
+
+def check_regressor_does_not_override_final_methods(estimator_class):
+    """Test does not override final methods."""
+    if "fit" in estimator_class.__dict__:
+        raise ValueError(f"Classifier {estimator_class} overrides the method fit")
+    if "predict" in estimator_class.__dict__:
+        raise ValueError(
+            f"Classifier {estimator_class} overrides the method " f"predict"
+        )
+
+
+def check_regressor_saving_loading_deep_learning(estimator_class, datatype):
+    """Test Deep Regressor saving."""
+    with tempfile.TemporaryDirectory() as tmp:
+        if not (
+            estimator_class.__name__
+            in [
+                "BaseDeepRegressor",
+                "InceptionTimeRegressor",
+                "LITETimeRegressor",
+                "TapNetRegressor",
+            ]
+        ):
+            if tmp[-1] != "/":
+                tmp = tmp + "/"
+            curr_time = str(time.time_ns())
+            last_file_name = curr_time + "last"
+            best_file_name = curr_time + "best"
+            init_file_name = curr_time + "init"
+
+            deep_rgs_train = estimator_class(
+                n_epochs=2,
+                save_best_model=True,
+                save_last_model=True,
+                save_init_model=True,
+                best_file_name=best_file_name,
+                last_file_name=last_file_name,
+                init_file_name=init_file_name,
+                file_path=tmp,
+            )
+            deep_rgs_train.fit(
+                FULL_TEST_DATA_DICT[datatype]["train"][0],
+                FULL_TEST_DATA_DICT[datatype]["train"][1],
+            )
+
+            deep_rgs_best = estimator_class()
+            deep_rgs_best.load_model(
+                model_path=os.path.join(tmp, best_file_name + ".keras"),
+            )
+            ypred_best = deep_rgs_best.predict(
+                FULL_TEST_DATA_DICT[datatype]["train"][0]
+            )
+            assert len(ypred_best) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+
+            deep_rgs_last = estimator_class()
+            deep_rgs_last.load_model(
+                model_path=os.path.join(tmp, last_file_name + ".keras"),
+            )
+            ypred_last = deep_rgs_last.predict(
+                FULL_TEST_DATA_DICT[datatype]["train"][0]
+            )
+            assert len(ypred_last) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+
+            deep_rgs_init = estimator_class()
+            deep_rgs_init.load_model(
+                model_path=os.path.join(tmp, init_file_name + ".keras"),
+            )
+            ypred_init = deep_rgs_init.predict(
+                FULL_TEST_DATA_DICT[datatype]["train"][0]
+            )
+            assert len(ypred_init) == len(FULL_TEST_DATA_DICT[datatype]["train"][1])
+
+
+def check_regressor_random_state_deep_learning(estimator, datatype):
+    """Test Deep Regressor seeding."""
+    random_state = 42
+
+    deep_rgs1 = _clone_estimator(estimator, random_state=random_state)
+    deep_rgs1.fit(
+        FULL_TEST_DATA_DICT[datatype]["train"][0],
+        FULL_TEST_DATA_DICT[datatype]["train"][1],
+    )
+
+    layers1 = deep_rgs1.training_model_.layers[1:]
+
+    deep_rgs2 = _clone_estimator(estimator, random_state=random_state)
+    deep_rgs2.fit(
+        FULL_TEST_DATA_DICT[datatype]["train"][0],
+        FULL_TEST_DATA_DICT[datatype]["train"][1],
+    )
+
+    layers2 = deep_rgs2.training_model_.layers[1:]
+
+    assert len(layers1) == len(layers2)
+
+    for i in range(len(layers1)):
+        weights1 = layers1[i].get_weights()
+        weights2 = layers2[i].get_weights()
+
+        assert len(weights1) == len(weights2)
+
+        for j in range(len(weights1)):
+            _weight1 = np.asarray(weights1[j])
+            _weight2 = np.asarray(weights2[j])
+
+            np.testing.assert_almost_equal(_weight1, _weight2, 4)
diff --git a/aeon/testing/test_all_estimators.py b/aeon/testing/test_all_estimators.py
index a6dd584487..b65276b9dc 100644
--- a/aeon/testing/test_all_estimators.py
+++ b/aeon/testing/test_all_estimators.py
@@ -205,7 +205,7 @@ def _all_estimators(self):
             estimator_types=getattr(self, "estimator_type_filter", None),
             return_names=False,
             exclude_estimators=EXCLUDE_ESTIMATORS,
-            exclude_estimator_types=["classifier"],
+            exclude_estimator_types=["classifier", "regressor"],
         )
 
         # subsample estimators by OS & python version
diff --git a/aeon/testing/test_config.py b/aeon/testing/test_config.py
index cb11f3df59..0079ec6814 100644
--- a/aeon/testing/test_config.py
+++ b/aeon/testing/test_config.py
@@ -69,7 +69,8 @@
     # has a keras fail, unknown reason, see #1387
     "LearningShapeletClassifier": ["check_fit_deterministic"],
     # does not fit structure for test, needs investigation
-    "TapNetClassifier": ["check_random_state_deep_learning"],
+    "TapNetClassifier": ["check_classifier_random_state_deep_learning"],
+    "TapNetRegressor": ["check_regressor_random_state_deep_learning"],
     # needs investigation
     "SASTClassifier": ["check_fit_deterministic"],
     "RSASTClassifier": ["check_fit_deterministic"],
diff --git a/aeon/testing/tests/test_all_estimators.py b/aeon/testing/tests/test_all_estimators.py
index 454eb13c1e..a356c7eb0c 100644
--- a/aeon/testing/tests/test_all_estimators.py
+++ b/aeon/testing/tests/test_all_estimators.py
@@ -9,7 +9,7 @@
 from aeon.utils.sampling import random_partition
 
 ALL_ESTIMATORS = all_estimators(
-    estimator_types=["classifier"],
+    estimator_types=["classifier", "regressor"],
     return_names=False,
 )