diff --git a/aeon/anomaly_detection/tests/test_all_anomaly_detectors.py b/aeon/anomaly_detection/tests/test_all_anomaly_detectors.py
deleted file mode 100644
index 5255e1bf60..0000000000
--- a/aeon/anomaly_detection/tests/test_all_anomaly_detectors.py
+++ /dev/null
@@ -1,88 +0,0 @@
-"""Tests for all anomaly detectors."""
-
-__maintainer__ = ["MatthewMiddlehurst"]
-
-import numpy as np
-import pytest
-
-from aeon.base._base_series import VALID_INNER_TYPES
-from aeon.registry import all_estimators
-from aeon.testing.data_generation._legacy import make_series
-
-ALL_ANOMALY_DETECTORS = all_estimators(
-    estimator_types="anomaly-detector",
-    return_names=False,
-)
-
-labels = np.zeros(15, dtype=np.int_)
-labels[np.random.choice(15, 5)] = 1
-uv_series = make_series(n_timepoints=15, return_numpy=True, random_state=0)
-uv_series[labels == 1] += 1
-mv_series = make_series(
-    n_timepoints=15, n_columns=2, return_numpy=True, random_state=0
-).T
-mv_series[:, labels == 1] += 1
-
-
-@pytest.mark.parametrize("anomaly_detector", ALL_ANOMALY_DETECTORS)
-def test_anomaly_detector_univariate(anomaly_detector):
-    """Test the anomaly detector on univariate data."""
-    try:
-        ad = anomaly_detector.create_test_instance()
-    except ModuleNotFoundError:
-        return None
-
-    if anomaly_detector.get_class_tag(tag_name="capability:univariate"):
-        pred = ad.fit_predict(uv_series, labels)
-        assert isinstance(pred, np.ndarray)
-        assert pred.shape == (15,)
-        assert issubclass(pred.dtype.type, (np.integer, np.floating, np.bool_))
-    else:
-        with pytest.raises(ValueError, match="Univariate data not supported"):
-            ad.fit_predict(uv_series, labels)
-
-
-@pytest.mark.parametrize("anomaly_detector", ALL_ANOMALY_DETECTORS)
-def test_anomaly_detector_multivariate(anomaly_detector):
-    """Test the anomaly detector on multivariate data."""
-    try:
-        ad = anomaly_detector.create_test_instance()
-    except ModuleNotFoundError:
-        return None
-
-    if anomaly_detector.get_class_tag(tag_name="capability:multivariate"):
-        pred = ad.fit_predict(mv_series, labels)
-        assert isinstance(pred, np.ndarray)
-        assert pred.shape == (15,)
-        assert issubclass(pred.dtype.type, (np.integer, np.floating, np.bool_))
-    else:
-        with pytest.raises(ValueError, match="Multivariate data not supported"):
-            ad.fit_predict(mv_series, labels)
-
-
-@pytest.mark.parametrize("anomaly_detector", ALL_ANOMALY_DETECTORS)
-def test_anomaly_detector_overrides_and_tags(anomaly_detector):
-    """Test compliance with the anomaly detector base class contract."""
-    # Test they don't override final methods, because Python does not enforce this
-    assert "fit" not in anomaly_detector.__dict__
-    assert "predict" not in anomaly_detector.__dict__
-    assert "fit_predict" not in anomaly_detector.__dict__
-
-    # Test that all anomaly detectors implement abstract predict.
-    assert "_predict" in anomaly_detector.__dict__
-
-    # axis class parameter is for internal use only
-    assert "axis" not in anomaly_detector.__dict__
-
-    # Test that fit_is_empty is correctly set
-    fit_is_empty = anomaly_detector.get_class_tag(tag_name="fit_is_empty")
-    assert not fit_is_empty == "_fit" not in anomaly_detector.__dict__
-
-    # Test valid tag for X_inner_type
-    X_inner_type = anomaly_detector.get_class_tag(tag_name="X_inner_type")
-    assert X_inner_type in VALID_INNER_TYPES
-
-    # Must have at least one set to True
-    multi = anomaly_detector.get_class_tag(tag_name="capability:multivariate")
-    uni = anomaly_detector.get_class_tag(tag_name="capability:univariate")
-    assert multi or uni
diff --git a/aeon/benchmarking/tests/test_experiments.py b/aeon/benchmarking/tests/test_experiments.py
index 82a0fa8f99..3a7ffa09fa 100644
--- a/aeon/benchmarking/tests/test_experiments.py
+++ b/aeon/benchmarking/tests/test_experiments.py
@@ -15,7 +15,7 @@
 from aeon.classification import DummyClassifier
 from aeon.clustering import TimeSeriesKMeans
 from aeon.datasets import load_unit_test
-from aeon.testing.test_config import PR_TESTING
+from aeon.testing.testing_config import PR_TESTING
 
 
 @pytest.mark.skipif(
diff --git a/aeon/benchmarking/tests/test_results_loaders.py b/aeon/benchmarking/tests/test_results_loaders.py
index efdfd3da4d..5faf6940ae 100644
--- a/aeon/benchmarking/tests/test_results_loaders.py
+++ b/aeon/benchmarking/tests/test_results_loaders.py
@@ -16,7 +16,7 @@
     get_estimator_results_as_array,
 )
 from aeon.datasets._data_loaders import CONNECTION_ERRORS
-from aeon.testing.test_config import PR_TESTING
+from aeon.testing.testing_config import PR_TESTING
 
 cls = ["HC2", "FreshPRINCE", "InceptionT"]
 data = ["Chinatown", "Tools"]
diff --git a/aeon/classification/early_classification/tests/test_all_early_classifiers.py b/aeon/classification/early_classification/tests/test_all_early_classifiers.py
deleted file mode 100644
index 5a3058f40b..0000000000
--- a/aeon/classification/early_classification/tests/test_all_early_classifiers.py
+++ /dev/null
@@ -1,122 +0,0 @@
-"""Unit tests for early classifier input output."""
-
-__maintainer__ = []
-
-from sys import platform
-
-import numpy as np
-from sklearn.utils._testing import set_random_state
-
-from aeon.datasets import load_basic_motions, load_unit_test
-from aeon.testing.expected_results.expected_classifier_outputs import (
-    basic_motions_proba,
-    unit_test_proba,
-)
-from aeon.testing.test_all_estimators import BaseFixtureGenerator, QuickTester
-from aeon.testing.utils.estimator_checks import _assert_array_almost_equal
-
-
-class EarlyClassifierFixtureGenerator(BaseFixtureGenerator):
-    """Fixture generator for early classifier tests.
-
-    Fixtures parameterized
-    ----------------------
-    estimator_class: estimator inheriting from BaseObject
-        ranges over estimator classes not excluded by EXCLUDE_ESTIMATORS, EXCLUDED_TESTS
-    estimator_instance: instance of estimator inheriting from BaseObject
-        ranges over estimator classes not excluded by EXCLUDE_ESTIMATORS, EXCLUDED_TESTS
-        instances are generated by create_test_instance class method
-    scenario: instance of TestScenario
-        ranges over all scenarios returned by retrieve_scenarios
-    """
-
-    # note: this should be separate from TestAllEarlyClassifiers
-    #   additional fixtures, parameters, etc should be added here
-    #   TestAllEarlyClassifiers should contain the tests only
-
-    estimator_type_filter = "early_classifier"
-
-
-class TestAllEarlyClassifiers(EarlyClassifierFixtureGenerator, QuickTester):
-    """Module level tests for all aeon classifiers."""
-
-    def test_classifier_output(self, estimator_instance, scenario):
-        """Test classifier outputs the correct data types and values.
-
-        Test predict produces a np.array or pd.Series with only values seen in the train
-        data, and that predict_proba probability estimates add up to one.
-        """
-        n_classes = scenario.get_tag("n_classes")
-        X_new = scenario.args["predict"]["X"]
-        y_train = scenario.args["fit"]["y"]
-        y_pred, decisions = scenario.run(
-            estimator_instance, method_sequence=["fit", "predict"]
-        )
-
-        # check predict
-        assert isinstance(y_pred, np.ndarray)
-        assert y_pred.shape == (X_new.shape[0],)
-        assert np.all(np.isin(np.unique(y_pred), np.unique(y_train)))
-        assert isinstance(decisions, np.ndarray)
-        assert decisions.shape == (X_new.shape[0],)
-        assert decisions.dtype == bool
-
-        # predict and update methods should update the state info as an array
-        assert isinstance(estimator_instance.get_state_info(), np.ndarray)
-
-        # check predict proba (all classifiers have predict_proba by default)
-        y_proba, decisions = scenario.run(
-            estimator_instance, method_sequence=["predict_proba"]
-        )
-        assert isinstance(y_proba, np.ndarray)
-        assert y_proba.shape == (X_new.shape[0], n_classes)
-        np.testing.assert_allclose(y_proba.sum(axis=1), 1)
-        assert isinstance(decisions, np.ndarray)
-        assert decisions.shape == (X_new.shape[0],)
-        assert decisions.dtype == bool
-
-    def test_early_classifier_against_expected_results(self, estimator_class):
-        """Test early classifier against stored results."""
-        # we only use the first estimator instance for testing
-        classname = estimator_class.__name__
-
-        # We cannot guarantee same results on ARM macOS
-        if platform == "darwin":
-            return None
-
-        for data_name, data_dict, data_loader, data_seed in [
-            ["UnitTest", unit_test_proba, load_unit_test, 0],
-            ["BasicMotions", basic_motions_proba, load_basic_motions, 4],
-        ]:
-            # retrieve expected predict_proba output, and skip test if not available
-            if classname in data_dict.keys():
-                expected_probas = data_dict[classname]
-            else:
-                # skip test if no expected probas are registered
-                continue
-
-            # we only use the first estimator instance for testing
-            estimator_instance = estimator_class.create_test_instance(
-                parameter_set="results_comparison"
-            )
-            # set random seed if possible
-            set_random_state(estimator_instance, 0)
-
-            # load test data
-            X_train, y_train = data_loader(split="train")
-            X_test, _ = data_loader(split="test")
-            indices = np.random.RandomState(data_seed).choice(
-                len(y_train), 10, replace=False
-            )
-
-            # train classifier and predict probas
-            estimator_instance.fit(X_train[indices], y_train[indices])
-            y_proba, _ = estimator_instance.predict_proba(X_test[indices])
-
-            # assert probabilities are the same
-            _assert_array_almost_equal(
-                y_proba,
-                expected_probas,
-                decimal=2,
-                err_msg=f"Failed to reproduce results for {classname} on {data_name}",
-            )
diff --git a/aeon/classification/hybrid/tests/test_hc.py b/aeon/classification/hybrid/tests/test_hc.py
index 5e50cb6bf7..23861f4f01 100644
--- a/aeon/classification/hybrid/tests/test_hc.py
+++ b/aeon/classification/hybrid/tests/test_hc.py
@@ -5,7 +5,7 @@
 
 from aeon.classification.hybrid import HIVECOTEV1, HIVECOTEV2
 from aeon.testing.data_generation import make_example_3d_numpy
-from aeon.testing.test_config import PR_TESTING
+from aeon.testing.testing_config import PR_TESTING
 
 
 @pytest.mark.skipif(PR_TESTING, reason="slow test, run overnight only")
diff --git a/aeon/datasets/tests/test_data_loaders.py b/aeon/datasets/tests/test_data_loaders.py
index 6844367da0..e227a14f77 100644
--- a/aeon/datasets/tests/test_data_loaders.py
+++ b/aeon/datasets/tests/test_data_loaders.py
@@ -31,7 +31,7 @@
     _load_tsc_dataset,
     download_dataset,
 )
-from aeon.testing.test_config import PR_TESTING
+from aeon.testing.testing_config import PR_TESTING
 
 
 @pytest.mark.skipif(
diff --git a/aeon/datasets/tests/test_data_writers.py b/aeon/datasets/tests/test_data_writers.py
index 0c225925cc..bf26cedc04 100644
--- a/aeon/datasets/tests/test_data_writers.py
+++ b/aeon/datasets/tests/test_data_writers.py
@@ -22,7 +22,7 @@
     make_example_3d_numpy_list,
     make_example_nested_dataframe,
 )
-from aeon.testing.test_config import PR_TESTING
+from aeon.testing.testing_config import PR_TESTING
 
 
 @pytest.mark.skipif(
diff --git a/aeon/datasets/tests/test_dataframe_loaders.py b/aeon/datasets/tests/test_dataframe_loaders.py
index 3a16999e9b..d886df0045 100644
--- a/aeon/datasets/tests/test_dataframe_loaders.py
+++ b/aeon/datasets/tests/test_dataframe_loaders.py
@@ -11,7 +11,7 @@
     load_from_tsfile_to_dataframe,
     load_from_ucr_tsv_to_dataframe,
 )
-from aeon.testing.test_config import PR_TESTING
+from aeon.testing.testing_config import PR_TESTING
 
 
 @pytest.mark.skipif(
diff --git a/aeon/datasets/tests/test_dataset_collections.py b/aeon/datasets/tests/test_dataset_collections.py
index 611f2e7804..883f53f26e 100644
--- a/aeon/datasets/tests/test_dataset_collections.py
+++ b/aeon/datasets/tests/test_dataset_collections.py
@@ -8,7 +8,7 @@
     get_available_tsf_datasets,
     get_downloaded_tsf_datasets,
 )
-from aeon.testing.test_config import PR_TESTING
+from aeon.testing.testing_config import PR_TESTING
 
 
 @pytest.mark.skipif(
diff --git a/aeon/datasets/tests/test_load_forecasting.py b/aeon/datasets/tests/test_load_forecasting.py
index 7f0043800c..cf90835307 100644
--- a/aeon/datasets/tests/test_load_forecasting.py
+++ b/aeon/datasets/tests/test_load_forecasting.py
@@ -8,7 +8,7 @@
 
 import aeon
 from aeon.datasets import load_forecasting, load_from_tsf_file, load_uschange
-from aeon.testing.test_config import PR_TESTING
+from aeon.testing.testing_config import PR_TESTING
 
 _CHECKS = {
     "uschange": {
diff --git a/aeon/datasets/tests/test_tss_data_loader.py b/aeon/datasets/tests/test_tss_data_loader.py
index f0d3e9cdcf..3d90b644df 100644
--- a/aeon/datasets/tests/test_tss_data_loader.py
+++ b/aeon/datasets/tests/test_tss_data_loader.py
@@ -12,7 +12,7 @@
 )
 from aeon.datasets.tests.test_data_loaders import CONNECTION_ERRORS
 from aeon.segmentation import ClaSPSegmenter
-from aeon.testing.test_config import PR_TESTING
+from aeon.testing.testing_config import PR_TESTING
 
 
 @pytest.mark.skipif(
diff --git a/aeon/segmentation/tests/test_all_segmenters.py b/aeon/segmentation/tests/test_all_segmenters.py
deleted file mode 100644
index 8696766d12..0000000000
--- a/aeon/segmentation/tests/test_all_segmenters.py
+++ /dev/null
@@ -1,77 +0,0 @@
-"""Tests for all segmenters."""
-
-import numpy as np
-import pytest
-
-from aeon.base._base_series import VALID_INNER_TYPES
-from aeon.registry import all_estimators
-
-ALL_SEGMENTERS = all_estimators(
-    estimator_types="segmenter",
-    return_names=False,
-)
-
-
-@pytest.mark.parametrize("segmenter", ALL_SEGMENTERS)
-def test_segmenter_base_functionality(segmenter):
-    """Test compliance with the base class contract."""
-    # Test they dont override final methods, because python does not enforce this
-    assert "fit" not in segmenter.__dict__
-    assert "predict" not in segmenter.__dict__
-    assert "fit_predict" not in segmenter.__dict__
-    # Test that all segmenters implement abstract predict.
-    assert "_predict" in segmenter.__dict__
-    # Test that fit_is_empty is correctly set
-    fit_is_empty = segmenter.get_class_tag(tag_name="fit_is_empty")
-    assert not fit_is_empty == "_fit" not in segmenter.__dict__
-    # Test valid tag for X_inner_type
-    X_inner_type = segmenter.get_class_tag(tag_name="X_inner_type")
-    assert X_inner_type in VALID_INNER_TYPES
-    # Must have at least one set to True
-    multi = segmenter.get_class_tag(tag_name="capability:multivariate")
-    uni = segmenter.get_class_tag(tag_name="capability:univariate")
-    assert multi or uni
-
-
-def _assert_output(output, dense, length):
-    """Assert the properties of the segmenter output."""
-    assert isinstance(output, np.ndarray)
-    if dense:  # Change points returned
-        assert len(output) < length
-        assert max(output) < length
-        assert min(output) >= 0
-        # Test in ascending order
-        assert all(output[i] <= output[i + 1] for i in range(len(output) - 1))
-    else:  # Segment labels returned, must be same length sas series
-        assert len(output) == length
-
-
-@pytest.mark.parametrize("segmenter", ALL_SEGMENTERS)
-def test_segmenter_instance(segmenter):
-    """Test segmenters."""
-    try:
-        instance = segmenter.create_test_instance()
-    except ModuleNotFoundError:
-        return
-
-    multivariate = segmenter.get_class_tag(tag_name="capability:multivariate")
-    X = np.random.random(size=(5, 20))
-    # Also tests does not fail if y is passed
-    y = np.array([0, 0, 0, 1, 1])
-    # Test that capability:multivariate is correctly set
-    dense = segmenter.get_class_tag(tag_name="returns_dense")
-    if multivariate:
-        output = instance.fit_predict(X, y, axis=1)
-        _assert_output(output, dense, X.shape[1])
-    else:
-        with pytest.raises(ValueError, match="Multivariate data not supported"):
-            instance.fit_predict(X, y, axis=1)
-    # Test that output is correct type
-    X = np.random.random(size=(20))
-    uni = segmenter.get_class_tag(tag_name="capability:univariate")
-    if uni:
-        output = instance.fit_predict(X, y=X)
-        _assert_output(output, dense, len(X))
-    else:
-        with pytest.raises(ValueError, match="Univariate data not supported"):
-            instance.fit_predict(X)
diff --git a/aeon/testing/estimator_checking/_estimator_checking.py b/aeon/testing/estimator_checking/_estimator_checking.py
index b00705f2b2..eb15557934 100644
--- a/aeon/testing/estimator_checking/_estimator_checking.py
+++ b/aeon/testing/estimator_checking/_estimator_checking.py
@@ -18,7 +18,7 @@
 from aeon.testing.estimator_checking._yield_estimator_checks import (
     _yield_all_aeon_checks,
 )
-from aeon.testing.test_config import EXCLUDE_ESTIMATORS, EXCLUDED_TESTS
+from aeon.testing.testing_config import EXCLUDE_ESTIMATORS, EXCLUDED_TESTS
 from aeon.utils.validation._dependencies import (
     _check_estimator_deps,
     _check_soft_dependencies,
@@ -293,7 +293,7 @@ def _should_be_skipped(estimator, check, has_dependencies):
     check_name = check.func.__name__ if isinstance(check, partial) else check.__name__
 
     # check estimator dependencies
-    if not has_dependencies:
+    if not has_dependencies and "softdep" not in check_name:
         return True, "Incompatible dependencies or Python version", check_name
 
     # check aeon exclude lists
@@ -344,6 +344,8 @@ def _get_check_estimator_ids(obj):
     elif hasattr(obj, "get_params"):
         with config_context(print_changed_only=True):
             s = re.sub(r"\s", "", str(obj))
-            return re.sub(r"<function[^)]*>", "func", s)
+            s = re.sub(r"<function[^)]*>", "func", s)
+            s = re.sub(r"<boundmethodrv[^)]*>", "boundmethod", s)
+            return s
     else:
         return obj
diff --git a/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py b/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py
new file mode 100644
index 0000000000..3686f6e0b9
--- /dev/null
+++ b/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py
@@ -0,0 +1,89 @@
+"""Tests for all anomaly detectors."""
+
+from functools import partial
+
+import numpy as np
+import pytest
+
+from aeon.base._base import _clone_estimator
+from aeon.base._base_series import VALID_INNER_TYPES
+from aeon.testing.data_generation._legacy import make_series
+
+
+def _yield_anomaly_detection_checks(estimator_class, estimator_instances, datatypes):
+    """Yield all anomaly detection checks for an aeon anomaly detector."""
+    # only class required
+    yield partial(
+        check_anomaly_detector_overrides_and_tags, estimator_class=estimator_class
+    )
+
+    # test class instances
+    for _, estimator in enumerate(estimator_instances):
+        # no data needed
+        yield partial(check_anomaly_detector_univariate, estimator=estimator)
+        yield partial(check_anomaly_detector_multivariate, estimator=estimator)
+
+
+def check_anomaly_detector_overrides_and_tags(estimator_class):
+    """Test compliance with the anomaly detector base class contract."""
+    # Test they don't override final methods, because Python does not enforce this
+    assert "fit" not in estimator_class.__dict__
+    assert "predict" not in estimator_class.__dict__
+    assert "fit_predict" not in estimator_class.__dict__
+
+    # Test that all anomaly detectors implement abstract predict.
+    assert "_predict" in estimator_class.__dict__
+
+    # axis class parameter is for internal use only
+    assert "axis" not in estimator_class.__dict__
+
+    # Test that fit_is_empty is correctly set
+    fit_is_empty = estimator_class.get_class_tag(tag_name="fit_is_empty")
+    assert not fit_is_empty == "_fit" not in estimator_class.__dict__
+
+    # Test valid tag for X_inner_type
+    X_inner_type = estimator_class.get_class_tag(tag_name="X_inner_type")
+    assert X_inner_type in VALID_INNER_TYPES
+
+    # Must have at least one set to True
+    multi = estimator_class.get_class_tag(tag_name="capability:multivariate")
+    uni = estimator_class.get_class_tag(tag_name="capability:univariate")
+    assert multi or uni
+
+
+labels = np.zeros(15, dtype=np.int_)
+labels[np.random.choice(15, 5)] = 1
+uv_series = make_series(n_timepoints=15, return_numpy=True, random_state=0)
+uv_series[labels == 1] += 1
+mv_series = make_series(
+    n_timepoints=15, n_columns=2, return_numpy=True, random_state=0
+).T
+mv_series[:, labels == 1] += 1
+
+
+def check_anomaly_detector_univariate(estimator):
+    """Test the anomaly detector on univariate data."""
+    estimator = _clone_estimator(estimator)
+
+    if estimator.get_class_tag(tag_name="capability:univariate"):
+        pred = estimator.fit_predict(uv_series, labels)
+        assert isinstance(pred, np.ndarray)
+        assert pred.shape == (15,)
+        assert issubclass(pred.dtype.type, (np.integer, np.floating, np.bool_))
+    else:
+        with pytest.raises(ValueError, match="Univariate data not supported"):
+            estimator.fit_predict(uv_series, labels)
+
+
+def check_anomaly_detector_multivariate(estimator):
+    """Test the anomaly detector on multivariate data."""
+    estimator = _clone_estimator(estimator)
+
+    if estimator.get_class_tag(tag_name="capability:multivariate"):
+        pred = estimator.fit_predict(mv_series, labels)
+        assert isinstance(pred, np.ndarray)
+        assert pred.shape == (15,)
+        assert issubclass(pred.dtype.type, (np.integer, np.floating, np.bool_))
+    else:
+        with pytest.raises(ValueError, match="Multivariate data not supported"):
+            estimator.fit_predict(mv_series, labels)
diff --git a/aeon/testing/estimator_checking/_yield_early_classification_checks.py b/aeon/testing/estimator_checking/_yield_early_classification_checks.py
new file mode 100644
index 0000000000..1b071ee648
--- /dev/null
+++ b/aeon/testing/estimator_checking/_yield_early_classification_checks.py
@@ -0,0 +1,126 @@
+"""Tests for all early classifiers."""
+
+from functools import partial
+from sys import platform
+
+import numpy as np
+from sklearn.utils._testing import set_random_state
+
+from aeon.base._base import _clone_estimator
+from aeon.datasets import load_basic_motions, load_unit_test
+from aeon.testing.expected_results.expected_classifier_outputs import (
+    basic_motions_proba,
+    unit_test_proba,
+)
+from aeon.testing.testing_data import FULL_TEST_DATA_DICT
+from aeon.testing.utils.estimator_checks import _assert_array_almost_equal
+from aeon.utils.validation import get_n_cases
+
+
+def _yield_early_classification_checks(estimator_class, estimator_instances, datatypes):
+    """Yield all early classification checks for an aeon early classifier."""
+    # only class required
+    yield partial(
+        check_early_classifier_against_expected_results, estimator_class=estimator_class
+    )
+
+    # test class instances
+    for i, estimator in enumerate(estimator_instances):
+        # test all data types
+        for datatype in datatypes[i]:
+            yield partial(
+                check_classifier_output,
+                estimator=estimator,
+                datatype=datatype,
+            )
+
+
+def check_early_classifier_against_expected_results(estimator_class):
+    """Test early classifier against stored results."""
+    # we only use the first estimator instance for testing
+    classname = estimator_class.__name__
+
+    # We cannot guarantee same results on ARM macOS
+    if platform == "darwin":
+        return None
+
+    for data_name, data_dict, data_loader, data_seed in [
+        ["UnitTest", unit_test_proba, load_unit_test, 0],
+        ["BasicMotions", basic_motions_proba, load_basic_motions, 4],
+    ]:
+        # retrieve expected predict_proba output, and skip test if not available
+        if classname in data_dict.keys():
+            expected_probas = data_dict[classname]
+        else:
+            # skip test if no expected probas are registered
+            continue
+
+        # we only use the first estimator instance for testing
+        estimator_instance = estimator_class.create_test_instance(
+            parameter_set="results_comparison"
+        )
+        # set random seed if possible
+        set_random_state(estimator_instance, 0)
+
+        # load test data
+        X_train, y_train = data_loader(split="train")
+        X_test, _ = data_loader(split="test")
+        indices = np.random.RandomState(data_seed).choice(
+            len(y_train), 10, replace=False
+        )
+
+        # train classifier and predict probas
+        estimator_instance.fit(X_train[indices], y_train[indices])
+        y_proba, _ = estimator_instance.predict_proba(X_test[indices])
+
+        # assert probabilities are the same
+        _assert_array_almost_equal(
+            y_proba,
+            expected_probas,
+            decimal=2,
+            err_msg=f"Failed to reproduce results for {classname} on {data_name}",
+        )
+
+
+def check_classifier_output(estimator, datatype):
+    """Test classifier outputs the correct data types and values.
+
+    Test predict produces a np.array or pd.Series with only values seen in the train
+    data, and that predict_proba probability estimates add up to one.
+    """
+    estimator = _clone_estimator(estimator)
+
+    unique_labels = np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1])
+
+    # run fit and predict
+    estimator.fit(
+        FULL_TEST_DATA_DICT[datatype]["train"][0],
+        FULL_TEST_DATA_DICT[datatype]["train"][1],
+    )
+    y_pred, decisions = estimator.predict(FULL_TEST_DATA_DICT[datatype]["test"][0])
+
+    # check predict
+    assert isinstance(y_pred, np.ndarray)
+    assert y_pred.shape == (get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]),)
+    assert np.all(np.isin(np.unique(y_pred), unique_labels))
+    assert isinstance(decisions, np.ndarray)
+    assert decisions.shape == (get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]),)
+    assert decisions.dtype == bool
+
+    # predict and update methods should update the state info as an array
+    assert isinstance(estimator.get_state_info(), np.ndarray)
+
+    # check predict proba (all classifiers have predict_proba by default)
+    y_proba, decisions = estimator.predict_proba(
+        FULL_TEST_DATA_DICT[datatype]["test"][0]
+    )
+
+    assert isinstance(y_proba, np.ndarray)
+    assert y_proba.shape == (
+        get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]),
+        len(unique_labels),
+    )
+    np.testing.assert_allclose(y_proba.sum(axis=1), 1)
+    assert isinstance(decisions, np.ndarray)
+    assert decisions.shape == (get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]),)
+    assert decisions.dtype == bool
diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py
index ca5b36d797..5a8aed029b 100644
--- a/aeon/testing/estimator_checking/_yield_estimator_checks.py
+++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py
@@ -11,24 +11,42 @@
 from sklearn.exceptions import NotFittedError
 from sklearn.utils.estimator_checks import check_get_params_invariance
 
+from aeon.anomaly_detection.base import BaseAnomalyDetector
 from aeon.base import BaseEstimator, BaseObject
 from aeon.base._base import _clone_estimator
 from aeon.classification import BaseClassifier
 from aeon.classification.deep_learning.base import BaseDeepClassifier
+from aeon.classification.early_classification import BaseEarlyClassifier
 from aeon.clustering import BaseClusterer
 from aeon.clustering.deep_learning.base import BaseDeepClusterer
 from aeon.regression import BaseRegressor
 from aeon.regression.deep_learning.base import BaseDeepRegressor
+from aeon.segmentation import BaseSegmenter
+from aeon.testing.estimator_checking._yield_anomaly_detection_checks import (
+    _yield_anomaly_detection_checks,
+)
 from aeon.testing.estimator_checking._yield_classification_checks import (
     _yield_classification_checks,
 )
 from aeon.testing.estimator_checking._yield_clustering_checks import (
     _yield_clustering_checks,
 )
+from aeon.testing.estimator_checking._yield_early_classification_checks import (
+    _yield_early_classification_checks,
+)
 from aeon.testing.estimator_checking._yield_regression_checks import (
     _yield_regression_checks,
 )
-from aeon.testing.test_config import (
+from aeon.testing.estimator_checking._yield_segmentation_checks import (
+    _yield_segmentation_checks,
+)
+from aeon.testing.estimator_checking._yield_soft_dependency_checks import (
+    _yield_soft_dependency_checks,
+)
+from aeon.testing.estimator_checking._yield_transformation_checks import (
+    _yield_transformation_checks,
+)
+from aeon.testing.testing_config import (
     NON_STATE_CHANGING_METHODS,
     NON_STATE_CHANGING_METHODS_ARRAYLIKE,
     VALID_ESTIMATOR_BASE_TYPES,
@@ -51,6 +69,8 @@ def _yield_all_aeon_checks(
     estimator, use_first_parameter_set=False, has_dependencies=None
 ):
     """Yield all checks for an aeon estimator."""
+    # functions which use this will generally skip if dependencies are not met
+    # UNLESS the check name has "softdep" in it
     if has_dependencies is None:
         has_dependencies = _check_estimator_deps(estimator, severity="none")
 
@@ -75,6 +95,8 @@ def _yield_all_aeon_checks(
     else:
         # if input does not have all dependencies installed, all tests are going to be
         # skipped as we cannot instantiate the class
+        # we still need inputs for the checks to return them and show that they
+        # have been skipped
         estimator_class = estimator if isclass(estimator) else type(estimator)
         estimator_instances = [None]
         datatypes = [[None]]
@@ -82,11 +104,20 @@ def _yield_all_aeon_checks(
     # start yielding checks
     yield from _yield_estimator_checks(estimator_class, estimator_instances, datatypes)
 
+    yield from _yield_soft_dependency_checks(
+        estimator_class, estimator_instances, datatypes
+    )
+
     if issubclass(estimator_class, BaseClassifier):
         yield from _yield_classification_checks(
             estimator_class, estimator_instances, datatypes
         )
 
+    if issubclass(estimator_class, BaseEarlyClassifier):
+        yield from _yield_early_classification_checks(
+            estimator_class, estimator_instances, datatypes
+        )
+
     if issubclass(estimator_class, BaseRegressor):
         yield from _yield_regression_checks(
             estimator_class, estimator_instances, datatypes
@@ -97,6 +128,21 @@ def _yield_all_aeon_checks(
             estimator_class, estimator_instances, datatypes
         )
 
+    if issubclass(estimator_class, BaseSegmenter):
+        yield from _yield_segmentation_checks(
+            estimator_class, estimator_instances, datatypes
+        )
+
+    if issubclass(estimator_class, BaseAnomalyDetector):
+        yield from _yield_anomaly_detection_checks(
+            estimator_class, estimator_instances, datatypes
+        )
+
+    if issubclass(estimator_class, BaseTransformer):
+        yield from _yield_transformation_checks(
+            estimator_class, estimator_instances, datatypes
+        )
+
 
 def _yield_estimator_checks(estimator_class, estimator_instances, datatypes):
     """Yield all general checks for an aeon estimator."""
@@ -147,7 +193,7 @@ def _yield_estimator_checks(estimator_class, estimator_instances, datatypes):
 
         if not _get_tag(estimator, "cant-pickle", default=False):
             yield partial(
-                test_persistence_via_pickle,
+                check_persistence_via_pickle,
                 estimator=estimator,
                 datatype=datatypes[i][0],
             )
@@ -611,7 +657,7 @@ def check_raises_not_fitted_error(estimator, datatype):
                 _run_estimator_method(estimator, method, datatype, "test")
 
 
-def test_persistence_via_pickle(estimator, datatype):
+def check_persistence_via_pickle(estimator, datatype):
     """Check that we can pickle all estimators."""
     estimator = _clone_estimator(estimator, random_state=0)
     _run_estimator_method(estimator, "fit", datatype, "train")
diff --git a/aeon/testing/estimator_checking/_yield_segmentation_checks.py b/aeon/testing/estimator_checking/_yield_segmentation_checks.py
new file mode 100644
index 0000000000..7f10d86d0f
--- /dev/null
+++ b/aeon/testing/estimator_checking/_yield_segmentation_checks.py
@@ -0,0 +1,79 @@
+"""Tests for all segmenters."""
+
+from functools import partial
+
+import numpy as np
+import pytest
+
+from aeon.base._base import _clone_estimator
+from aeon.base._base_series import VALID_INNER_TYPES
+
+
+def _yield_segmentation_checks(estimator_class, estimator_instances, datatypes):
+    """Yield all segmentation checks for an aeon segmenter."""
+    # only class required
+    yield partial(check_segmenter_base_functionality, estimator_class=estimator_class)
+
+    # test class instances
+    for _, estimator in enumerate(estimator_instances):
+        # no data needed
+        yield partial(check_segmenter_instance, estimator=estimator)
+
+
+def check_segmenter_base_functionality(estimator_class):
+    """Test compliance with the base class contract."""
+    # Test they dont override final methods, because python does not enforce this
+    assert "fit" not in estimator_class.__dict__
+    assert "predict" not in estimator_class.__dict__
+    assert "fit_predict" not in estimator_class.__dict__
+    # Test that all segmenters implement abstract predict.
+    assert "_predict" in estimator_class.__dict__
+    # Test that fit_is_empty is correctly set
+    fit_is_empty = estimator_class.get_class_tag(tag_name="fit_is_empty")
+    assert not fit_is_empty == "_fit" not in estimator_class.__dict__
+    # Test valid tag for X_inner_type
+    X_inner_type = estimator_class.get_class_tag(tag_name="X_inner_type")
+    assert X_inner_type in VALID_INNER_TYPES
+    # Must have at least one set to True
+    multi = estimator_class.get_class_tag(tag_name="capability:multivariate")
+    uni = estimator_class.get_class_tag(tag_name="capability:univariate")
+    assert multi or uni
+
+
+def check_segmenter_instance(estimator):
+    """Test segmenters."""
+    estimator = _clone_estimator(estimator)
+
+    def _assert_output(output, dense, length):
+        """Assert the properties of the segmenter output."""
+        assert isinstance(output, np.ndarray)
+        if dense:  # Change points returned
+            assert len(output) < length
+            assert max(output) < length
+            assert min(output) >= 0
+            # Test in ascending order
+            assert all(output[i] <= output[i + 1] for i in range(len(output) - 1))
+        else:  # Segment labels returned, must be same length sas series
+            assert len(output) == length
+
+    multivariate = estimator.get_class_tag(tag_name="capability:multivariate")
+    X = np.random.random(size=(5, 20))
+    # Also tests does not fail if y is passed
+    y = np.array([0, 0, 0, 1, 1])
+    # Test that capability:multivariate is correctly set
+    dense = estimator.get_class_tag(tag_name="returns_dense")
+    if multivariate:
+        output = estimator.fit_predict(X, y, axis=1)
+        _assert_output(output, dense, X.shape[1])
+    else:
+        with pytest.raises(ValueError, match="Multivariate data not supported"):
+            estimator.fit_predict(X, y, axis=1)
+    # Test that output is correct type
+    X = np.random.random(size=(20))
+    uni = estimator.get_class_tag(tag_name="capability:univariate")
+    if uni:
+        output = estimator.fit_predict(X, y=X)
+        _assert_output(output, dense, len(X))
+    else:
+        with pytest.raises(ValueError, match="Univariate data not supported"):
+            estimator.fit_predict(X)
diff --git a/aeon/testing/estimator_checking/_yield_soft_dependency_checks.py b/aeon/testing/estimator_checking/_yield_soft_dependency_checks.py
new file mode 100644
index 0000000000..4539fafd33
--- /dev/null
+++ b/aeon/testing/estimator_checking/_yield_soft_dependency_checks.py
@@ -0,0 +1,71 @@
+"""Tests for estimator soft dependencies.
+
+Only tests with 'softdep' in the name will be run by `check_estimator` if any
+required package or version is missing. Other tests will be automatically skipped.
+"""
+
+from functools import partial
+
+import pytest
+
+from aeon.utils.validation._dependencies import (
+    _check_python_version,
+    _check_soft_dependencies,
+)
+
+
+def _yield_soft_dependency_checks(estimator_class, estimator_instances, datatypes):
+    """Yield all soft dependency checks for an aeon estimator."""
+    # only class required
+    yield partial(check_python_version_softdep, estimator_class=estimator_class)
+    yield partial(check_python_dependency_softdep, estimator_class=estimator_class)
+
+
+def check_python_version_softdep(estimator_class):
+    """Test that estimators raise error if python version is wrong."""
+    # if dependencies are incompatible skip
+    softdeps = estimator_class.get_class_tag("python_dependencies", None)
+    if softdeps is not None and not _check_soft_dependencies(softdeps, severity="none"):
+        return
+
+    # should be compatible with python version and able to construct
+    if _check_python_version(estimator_class, severity="none"):
+        estimator_class.create_test_instance()
+    # should raise a specific error if python version is incompatible
+    else:
+        pyspec = estimator_class.get_class_tag("python_version", None)
+        with pytest.raises(ModuleNotFoundError) as ex_info:
+            estimator_class.create_test_instance()
+        assert "requires python version to be" in str(ex_info.value), (
+            f"Estimator {estimator_class.__name__} has python version bound "
+            f"{pyspec} according to tags, but does not raise an appropriate "
+            f"error message on __init__ for incompatible python environments. "
+            f"Likely reason is that __init__ does not call super(cls).__init__."
+        )
+
+
+def check_python_dependency_softdep(estimator_class):
+    """Test that estimators raise error if required soft dependencies are missing."""
+    # if python version is incompatible skip
+    if not _check_python_version(estimator_class, severity="none"):
+        return
+
+    softdeps = estimator_class.get_class_tag("python_dependencies", None)
+
+    # should be compatible with installed dependencies and able to construct
+    if softdeps is None or _check_soft_dependencies(softdeps, severity="none"):
+        estimator_class.create_test_instance()
+    # should raise a specific error if any soft dependencies are missing
+    else:
+        with pytest.raises(ModuleNotFoundError) as ex_info:
+            estimator_class.create_test_instance()
+        assert (
+            "is a soft dependency and not included in the base aeon installation"
+            in str(ex_info.value)
+        ), (
+            f"Estimator {estimator_class.__name__} requires soft dependencies "
+            f"{softdeps} according to tags, but does not raise an appropriate "
+            f"error message on __init__, when a soft dependency is missing. "
+            f"Likely reason is that __init__ does not call super(cls).__init__, "
+            f"or imports super(cls).__init__ only after an attempted import."
+        )
diff --git a/aeon/testing/estimator_checking/_yield_transformation_checks.py b/aeon/testing/estimator_checking/_yield_transformation_checks.py
new file mode 100644
index 0000000000..c6eac2b127
--- /dev/null
+++ b/aeon/testing/estimator_checking/_yield_transformation_checks.py
@@ -0,0 +1,145 @@
+"""Tests for all transformers."""
+
+from functools import partial
+from sys import platform
+
+import numpy as np
+import pandas as pd
+from sklearn.utils._testing import set_random_state
+
+from aeon.base._base import _clone_estimator
+from aeon.datasets import load_basic_motions, load_unit_test
+from aeon.testing.expected_results.expected_transform_outputs import (
+    basic_motions_result,
+    unit_test_result,
+)
+from aeon.testing.testing_data import FULL_TEST_DATA_DICT
+from aeon.testing.utils.estimator_checks import (
+    _assert_array_almost_equal,
+    _run_estimator_method,
+)
+
+
+def _yield_transformation_checks(estimator_class, estimator_instances, datatypes):
+    """Yield all transformation checks for an aeon transformer."""
+    # only class required
+    yield partial(
+        check_transformer_against_expected_results, estimator_class=estimator_class
+    )
+
+    # test class instances
+    for i, estimator in enumerate(estimator_instances):
+        # no data needed
+        yield partial(
+            check_capability_inverse_tag_is_correct,
+            estimator=estimator,
+        )
+        yield partial(
+            check_remember_data_tag_is_correct,
+            estimator=estimator,
+        )
+
+        # test all data types
+        for datatype in datatypes[i]:
+            yield partial(
+                check_transform_inverse_transform_equivalent,
+                estimator=estimator,
+                datatype=datatype,
+            )
+
+
+def check_transformer_against_expected_results(estimator_class):
+    """Test transformer against stored results."""
+    # we only use the first estimator instance for testing
+    classname = estimator_class.__name__
+
+    # We cannot guarantee same results on ARM macOS
+    if platform == "darwin":
+        return None
+
+    for data_name, data_dict, data_loader, data_seed in [
+        ["UnitTest", unit_test_result, load_unit_test, 0],
+        ["BasicMotions", basic_motions_result, load_basic_motions, 4],
+    ]:
+        # retrieve expected transform output, and skip test if not available
+        if classname in data_dict.keys():
+            expected_results = data_dict[classname]
+        else:
+            # skip test if no expected results are registered
+            continue
+
+        # we only use the first estimator instance for testing
+        estimator_instance = estimator_class.create_test_instance(
+            parameter_set="results_comparison"
+        )
+        # set random seed if possible
+        set_random_state(estimator_instance, 0)
+
+        # load test data
+        X_train, y_train = data_loader(split="train")
+        indices = np.random.RandomState(data_seed).choice(
+            len(y_train), 5, replace=False
+        )
+
+        # fit transformer and transform
+        results = np.nan_to_num(
+            estimator_instance.fit_transform(X_train[indices], y_train[indices]),
+            False,
+            0,
+            0,
+            0,
+        )
+
+        # assert results are the same
+        _assert_array_almost_equal(
+            results,
+            expected_results,
+            decimal=2,
+            err_msg=f"Failed to reproduce results for {classname} on {data_name}",
+        )
+
+
+def check_capability_inverse_tag_is_correct(estimator):
+    """Test that the capability:inverse_transform tag is set correctly."""
+    capability_tag = estimator.get_tag("capability:inverse_transform")
+    skip_tag = estimator.get_tag("skip-inverse-transform")
+    if capability_tag and not skip_tag:
+        assert estimator._has_implementation_of("_inverse_transform")
+
+
+def check_remember_data_tag_is_correct(estimator):
+    """Test that the remember_data tag is set correctly."""
+    fit_empty_tag = estimator.get_tag("fit_is_empty", True)
+    remember_data_tag = estimator.get_tag("remember_data", False)
+    msg = (
+        'if the "remember_data" tag is set to True, then the "fit_is_empty" tag '
+        "must be set to False, even if _fit is not implemented or empty. "
+        "This is due to boilerplate that write to self.X in fit. "
+        f"Please check these two tags in {type(estimator)}."
+    )
+    if fit_empty_tag and remember_data_tag:
+        raise AssertionError(msg)
+
+
+def check_transform_inverse_transform_equivalent(estimator, datatype):
+    """Test that inverse_transform is indeed inverse to transform."""
+    # skip this test if the estimator does not have inverse_transform
+    if not estimator.get_class_tag("capability:inverse_transform", False):
+        return None
+
+    # skip this test if the estimator skips inverse_transform
+    if estimator.get_tag("skip-inverse-transform", False):
+        return None
+
+    estimator = _clone_estimator(estimator)
+
+    X = FULL_TEST_DATA_DICT[datatype]["train"][0]
+
+    _run_estimator_method(estimator, "fit", datatype, "train")
+    Xt = _run_estimator_method(estimator, "transform", datatype, "train")
+
+    Xit = estimator.inverse_transform(Xt)
+    if estimator.get_tag("transform-returns-same-time-index"):
+        _assert_array_almost_equal(X, Xit)
+    elif isinstance(X, pd.DataFrame):
+        _assert_array_almost_equal(X.loc[Xit.index], Xit)
diff --git a/aeon/testing/expected_results/results_reproduction/__init__.py b/aeon/testing/expected_results/results_reproduction/__init__.py
new file mode 100644
index 0000000000..56f1cda470
--- /dev/null
+++ b/aeon/testing/expected_results/results_reproduction/__init__.py
@@ -0,0 +1 @@
+"""Utilities for reproducing expected results."""
diff --git a/aeon/testing/expected_results/classifier_results_reproduction.py b/aeon/testing/expected_results/results_reproduction/classifier_results_reproduction.py
similarity index 100%
rename from aeon/testing/expected_results/classifier_results_reproduction.py
rename to aeon/testing/expected_results/results_reproduction/classifier_results_reproduction.py
diff --git a/aeon/testing/expected_results/regressor_results_reproduction.py b/aeon/testing/expected_results/results_reproduction/regressor_results_reproduction.py
similarity index 100%
rename from aeon/testing/expected_results/regressor_results_reproduction.py
rename to aeon/testing/expected_results/results_reproduction/regressor_results_reproduction.py
diff --git a/aeon/testing/expected_results/transform_results_reproduction.py b/aeon/testing/expected_results/results_reproduction/transform_results_reproduction.py
similarity index 100%
rename from aeon/testing/expected_results/transform_results_reproduction.py
rename to aeon/testing/expected_results/results_reproduction/transform_results_reproduction.py
diff --git a/aeon/testing/test_all_estimators.py b/aeon/testing/test_all_estimators.py
deleted file mode 100644
index dd3bf9f555..0000000000
--- a/aeon/testing/test_all_estimators.py
+++ /dev/null
@@ -1,1303 +0,0 @@
-"""Suite of tests for all estimators.
-
-adapted from scikit-learn's estimator_checks
-"""
-
-__maintainer__ = []
-
-import numbers
-import pickle
-import types
-from copy import deepcopy
-from inspect import getfullargspec, isclass, signature
-
-import joblib
-import numpy as np
-import pytest
-from sklearn.exceptions import NotFittedError
-from sklearn.utils._testing import set_random_state
-from sklearn.utils.estimator_checks import (
-    check_get_params_invariance as _check_get_params_invariance,
-)
-
-from aeon.base import BaseEstimator, BaseObject
-from aeon.classification.deep_learning.base import BaseDeepClassifier
-from aeon.registry import all_estimators
-from aeon.regression.deep_learning.base import BaseDeepRegressor
-from aeon.testing.test_config import (
-    EXCLUDE_ESTIMATORS,
-    EXCLUDED_TESTS,
-    NON_STATE_CHANGING_METHODS,
-    NON_STATE_CHANGING_METHODS_ARRAYLIKE,
-    PR_TESTING,
-    VALID_ESTIMATOR_BASE_TYPES,
-    VALID_ESTIMATOR_TAGS,
-)
-from aeon.testing.utils._conditional_fixtures import (
-    create_conditional_fixtures_and_names,
-)
-from aeon.testing.utils.deep_equals import deep_equals
-from aeon.testing.utils.estimator_checks import (
-    _assert_array_almost_equal,
-    _assert_array_equal,
-    _get_args,
-    _has_capability,
-    _list_required_methods,
-)
-from aeon.testing.utils.scenarios_getter import retrieve_scenarios
-from aeon.utils.sampling import random_partition
-from aeon.utils.validation._dependencies import _check_estimator_deps
-
-
-def subsample_by_version_os(x):
-    """Subsample objects by operating system and python version.
-
-    Ensures each estimator is tested at least once on every OS and python version,
-    if combined with a matrix of OS/versions.
-
-    Currently assumes that matrix includes py3.9-3.12, and win/ubuntu/mac.
-    """
-    import platform
-    import sys
-
-    # only use 3 Python versions in PR
-    ix = sys.version_info.minor
-    if ix == 9:
-        ix = 0
-    elif ix == 11:
-        ix = 1
-    elif ix == 12:
-        ix = 2
-
-    os_str = platform.system()
-    if os_str == "Windows":
-        ix = ix
-    elif os_str == "Linux":
-        ix = ix + 1
-    elif os_str == "Darwin":
-        ix = ix + 2
-    else:
-        raise ValueError(f"found unexpected OS string: {os_str}")
-    ix = ix % 3
-
-    part = random_partition(len(x), 3)
-    subset_idx = part[ix]
-    res = [x[i] for i in subset_idx]
-
-    return res
-
-
-def test_subsample_by_version_os():
-    """Test subsample_by_version_os."""
-    res = subsample_by_version_os(["Windows", "Linux", "Darwin"])
-    assert len(res) == 1
-    assert res in [["Windows"], ["Linux"], ["Darwin"]]
-
-
-class BaseFixtureGenerator:
-    """Fixture generator for base testing functionality in aeon.
-
-    Test classes inheriting from this and not overriding pytest_generate_tests
-        will have estimator and scenario fixtures parametrized out of the box.
-
-    Descendants can override:
-        estimator_type_filter: str, class variable; None or string of estimator type
-            e.g., "classifier", "regressor", "segmenter".
-            see BASE_CLASS_IDENTIFIER_LIST
-            which estimators are being retrieved and tested
-        fixture_sequence: list of str
-            sequence of fixture variable names in conditional fixture generation
-        _generate_[variable]: object methods, all (test_name: str, **kwargs) -> list
-            generating list of fixtures for fixture variable with name [variable]
-                to be used in test with name test_name
-            can optionally use values for fixtures earlier in fixture_sequence,
-                these must be input as kwargs in a call
-        is_excluded: static method (test_name: str, est: class) -> bool
-            whether test with name test_name should be excluded for estimator est
-                should be used only for encoding general rules, not individual skips
-                individual skips should go on the EXCLUDED_TESTS list in _config
-            requires _generate_estimator_class and _generate_estimator_instance as is
-        _excluded_scenario: static method (test_name: str, scenario) -> bool
-            whether scenario should be skipped in test with test_name test_name
-            requires _generate_estimator_scenario as is
-
-    Fixtures parametrized
-    ---------------------
-    estimator_class: estimator inheriting from BaseObject
-        ranges over estimator classes not excluded by EXCLUDE_ESTIMATORS, EXCLUDED_TESTS
-    estimator_instance: instance of estimator inheriting from BaseObject
-        ranges over estimator classes not excluded by EXCLUDE_ESTIMATORS, EXCLUDED_TESTS
-        instances are generated by create_test_instance class method of estimator_class
-    scenario: instance of TestScenario
-        ranges over all scenarios returned by retrieve_scenarios
-        applicable for estimator_class or estimator_instance
-    method_nsc: string, name of estimator method
-        ranges over all "predict"-like, non-state-changing methods
-        of estimator_instance or estimator_class that the class/object implements
-    method_nsc_arraylike: string, for non-state-changing estimator methods
-        ranges over all "predict"-like, non-state-changing estimator methods,
-        which return an array-like output
-    """
-
-    # class variables which can be overridden by descendants
-
-    # which estimator types are generated; None=all
-    estimator_type_filter = None
-
-    # which sequence the conditional fixtures are generated in
-    fixture_sequence = [
-        "estimator_class",
-        "estimator_instance",
-        "scenario",
-        "method_nsc",
-        "method_nsc_arraylike",
-    ]
-
-    # which fixtures are indirect, e.g., have an additional pytest.fixture block
-    #   to generate an indirect fixture at runtime. Example: estimator_instance
-    #   warning: direct fixtures retain state changes within the same test
-    indirect_fixtures = ["estimator_instance"]
-
-    def pytest_generate_tests(self, metafunc):
-        """Test parameterization routine for pytest.
-
-        This uses create_conditional_fixtures_and_names and generator_dict
-        to create the fixtures for a mark.parametrize decoration of all tests.
-        """
-        # get name of the test
-        test_name = metafunc.function.__name__
-
-        fixture_sequence = self.fixture_sequence
-
-        fixture_vars = getfullargspec(metafunc.function)[0]
-
-        (
-            fixture_param_str,
-            fixture_prod,
-            fixture_names,
-        ) = create_conditional_fixtures_and_names(
-            test_name=test_name,
-            fixture_vars=fixture_vars,
-            generator_dict=self.generator_dict(),
-            fixture_sequence=fixture_sequence,
-            raise_exceptions=True,
-        )
-
-        # determine indirect variables for the parametrization block
-        #   this is intersection of self.indirect_vixtures with args in fixture_vars
-        indirect_vars = list(set(fixture_vars).intersection(self.indirect_fixtures))
-
-        metafunc.parametrize(
-            fixture_param_str,
-            fixture_prod,
-            ids=fixture_names,
-            indirect=indirect_vars,
-        )
-
-    def _all_estimators(self):
-        """Retrieve list of all estimator classes of type self.estimator_type_filter."""
-        est_list = all_estimators(
-            estimator_types=getattr(self, "estimator_type_filter", None),
-            return_names=False,
-            exclude_estimators=EXCLUDE_ESTIMATORS,
-            exclude_estimator_types=["classifier", "regressor", "clusterer"],
-        )
-
-        # subsample estimators by OS & python version
-        # this ensures that only a 1/3 of estimators are tested for a given combination
-        # but all are tested on every OS at least once, and on every python version once
-        if PR_TESTING:
-            est_list = subsample_by_version_os(est_list)
-        return est_list
-
-    def generator_dict(self):
-        """Return dict with methods _generate_[variable] collected in a dict.
-
-        The returned dict is the one required by create_conditional_fixtures_and_names,
-            used in this _conditional_fixture plug-in to pytest_generate_tests, above.
-
-        Returns
-        -------
-        generator_dict : dict, with keys [variable], where
-            [variable] are all strings such that self has a static method
-                named _generate_[variable](test_name: str, **kwargs)
-            value at [variable] is a reference to _generate_[variable]
-        """
-        gens = [attr for attr in dir(self) if attr.startswith("_generate_")]
-        vars = [gen.replace("_generate_", "") for gen in gens]
-
-        generator_dict = dict()
-        for var, gen in zip(vars, gens):
-            generator_dict[var] = getattr(self, gen)
-
-        return generator_dict
-
-    @staticmethod
-    def is_excluded(test_name, est):
-        """Shorthand to check whether test test_name is excluded for estimator est."""
-        return test_name in EXCLUDED_TESTS.get(est.__name__, [])
-
-    # the following functions define fixture generation logic for pytest_generate_tests
-    # each function is of signature (test_name:str, **kwargs) -> List of fixtures
-    # function with name _generate_[fixture_var] returns list of values for fixture_var
-    #   where fixture_var is a fixture variable used in tests
-    # the list is conditional on values of other fixtures which can be passed in kwargs
-
-    def _generate_estimator_class(self, test_name, **kwargs):
-        """Return estimator class fixtures.
-
-        Fixtures parametrized
-        ---------------------
-        estimator_class: estimator inheriting from BaseObject
-            ranges over all estimator classes not excluded by EXCLUDED_TESTS
-        """
-        estimator_classes_to_test = [
-            est
-            for est in self._all_estimators()
-            if not self.is_excluded(test_name, est)
-        ]
-
-        # exclude classes based on python version compatibility
-        estimator_classes_to_test = [
-            est
-            for est in estimator_classes_to_test
-            if _check_estimator_deps(est, severity="none")
-        ]
-
-        estimator_names = [est.__name__ for est in estimator_classes_to_test]
-
-        return estimator_classes_to_test, estimator_names
-
-    def _generate_estimator_instance(self, test_name, **kwargs):
-        """Return estimator instance fixtures.
-
-        Fixtures parametrized
-        ---------------------
-        estimator_instance: instance of estimator inheriting from BaseObject
-            ranges over all estimator classes not excluded by EXCLUDED_TESTS
-            instances are generated by create_test_instance class method
-        """
-        # call _generate_estimator_class to get all the classes
-        estimator_classes_to_test, _ = self._generate_estimator_class(
-            test_name=test_name
-        )
-
-        # create instances from the classes
-        estimator_instances_to_test = []
-        estimator_instance_names = []
-        # retrieve all estimator parameters if multiple, construct instances
-        for est in estimator_classes_to_test:
-            all_instances_of_est, instance_names = est.create_test_instances_and_names()
-            estimator_instances_to_test += all_instances_of_est
-            estimator_instance_names += instance_names
-
-        return estimator_instances_to_test, estimator_instance_names
-
-    # this is executed before each test instance call
-    #   if this were not executed, estimator_instance would keep state changes
-    #   within executions of the same test with different parameters
-    @pytest.fixture(scope="function")
-    def estimator_instance(self, request):
-        """estimator_instance fixture definition for indirect use."""
-        # esetimator_instance is cloned at the start of every test
-        return request.param.clone()
-
-    def _generate_scenario(self, test_name, **kwargs):
-        """Return estimator test scenario.
-
-        Fixtures parametrized
-        ---------------------
-        scenario: instance of TestScenario
-            ranges over all scenarios returned by retrieve_scenarios
-        """
-        if "estimator_class" in kwargs.keys():
-            obj = kwargs["estimator_class"]
-        elif "estimator_instance" in kwargs.keys():
-            obj = kwargs["estimator_instance"]
-        else:
-            return []
-
-        scenarios = retrieve_scenarios(obj)
-        scenarios = [s for s in scenarios if not self._excluded_scenario(test_name, s)]
-        scenario_names = [type(scen).__name__ for scen in scenarios]
-
-        return scenarios, scenario_names
-
-    @staticmethod
-    def _excluded_scenario(test_name, scenario):
-        """Skip list generator for scenarios to skip in test_name.
-
-        Parameters
-        ----------
-        test_name : str, name of test
-        scenario : instance of TestScenario, to be used in test
-
-        Returns
-        -------
-        bool, whether scenario should be skipped in test_name
-        """
-        if test_name == "test_non_state_changing_method_contract":
-            if not scenario.get_tag("fh_passed_in_fit", True, raise_error=False):
-                return True
-
-        # this line excludes all scenarios that do not have "is_enabled" flag
-        #   we should slowly enable more scenarios for better coverage
-        # comment out to run the full test suite with new scenarios
-        if not scenario.get_tag("is_enabled", False, raise_error=False):
-            return True
-
-        return False
-
-    def _generate_method_nsc(self, test_name, **kwargs):
-        """Return estimator test scenario.
-
-        Fixtures parametrized
-        ---------------------
-        method_nsc: string, for non-state-changing estimator methods
-            ranges over all "predict"-like, non-state-changing estimator methods
-        """
-        # ensure cls is a class
-        if "estimator_class" in kwargs.keys():
-            obj = kwargs["estimator_class"]
-        elif "estimator_instance" in kwargs.keys():
-            obj = kwargs["estimator_instance"]
-        else:
-            return []
-
-        # complete list of all non-state-changing methods
-        nsc_list = NON_STATE_CHANGING_METHODS
-
-        # subset to the methods that x has implemented
-        nsc_list = [x for x in nsc_list if _has_capability(obj, x)]
-
-        return nsc_list
-
-    def _generate_method_nsc_arraylike(self, test_name, **kwargs):
-        """Return estimator test scenario.
-
-        Fixtures parametrized
-        ---------------------
-        method_nsc_arraylike: string, for non-state-changing estimator methods
-            ranges over all "predict"-like, non-state-changing estimator methods,
-            which return an array-like output
-        """
-        method_nsc_list = self._generate_method_nsc(test_name=test_name, **kwargs)
-
-        # subset to the arraylike ones to avoid copy-paste
-        nsc_list_arraylike = set(method_nsc_list).intersection(
-            NON_STATE_CHANGING_METHODS_ARRAYLIKE
-        )
-        return list(nsc_list_arraylike)
-
-
-class QuickTester:
-    """Mixin class which adds the run_tests method to run tests on one estimator."""
-
-    def run_tests(
-        self,
-        estimator,
-        raise_exceptions=False,
-        tests_to_run=None,
-        fixtures_to_run=None,
-        tests_to_exclude=None,
-        fixtures_to_exclude=None,
-    ):
-        """Run all tests on one single estimator.
-
-        All tests in self are run on the following estimator type fixtures:
-            if est is a class, then estimator_class = est, and
-                estimator_instance loops over est.create_test_instance()
-            if est is an object, then estimator_class = est.__class__, and
-                estimator_instance = est
-
-        This is compatible with pytest.mark.parametrize decoration,
-            but currently only with multiple *single variable* annotations.
-
-        Parameters
-        ----------
-        estimator : estimator class or estimator instance
-        raise_exceptions : bool, optional, default=False
-            whether to return exceptions/failures in the results dict, or raise them
-                if False: returns exceptions in returned `results` dict
-                if True: raises exceptions as they occur
-        tests_to_run : str or list of str, names of tests to run. default = all tests
-            sub-sets tests that are run to the tests given here.
-        fixtures_to_run : str or list of str, pytest test-fixture combination codes.
-            which test-fixture combinations to run. Default = run all of them.
-            sub-sets tests and fixtures to run to the list given here.
-            If both tests_to_run and fixtures_to_run are provided, runs the *union*,
-            i.e., all test-fixture combinations for tests in tests_to_run,
-                plus all test-fixture combinations in fixtures_to_run.
-        tests_to_exclude : str or list of str, names of tests to exclude. default = None
-            removes tests that should not be run, after subsetting via tests_to_run.
-        fixtures_to_exclude : str or list of str, fixtures to exclude. default = None
-            removes test-fixture combinations that should not be run.
-            This is done after subsetting via fixtures_to_run.
-            Overrides `return_exceptions` if used as a keyword argument.
-            both `raise_exceptions=True` and `return_exceptions=True`.
-            Will move to replace `return_exceptions` as 2nd arg in 0.17.0.
-
-        Returns
-        -------
-        results : dict of results of the tests in self
-            keys are test/fixture strings, identical as in pytest, e.g., test[fixture]
-            entries are the string "PASSED" if the test passed,
-                or the exception raised if the test did not pass
-            returned only if all tests pass,
-
-        Raises
-        ------
-        if raise_exceptions=True,
-        raises any exception produced by the tests directly
-        """
-        tests_to_run = self._check_None_str_or_list_of_str(
-            tests_to_run, var_name="tests_to_run"
-        )
-        fixtures_to_run = self._check_None_str_or_list_of_str(
-            fixtures_to_run, var_name="fixtures_to_run"
-        )
-        tests_to_exclude = self._check_None_str_or_list_of_str(
-            tests_to_exclude, var_name="tests_to_exclude"
-        )
-        fixtures_to_exclude = self._check_None_str_or_list_of_str(
-            fixtures_to_exclude, var_name="fixtures_to_exclude"
-        )
-
-        # retrieve tests from self
-        test_names = [attr for attr in dir(self) if attr.startswith("test")]
-
-        # we override the generator_dict, by replacing it with temp_generator_dict:
-        #  the only estimator (class or instance) is est, this is overridden
-        #  the remaining fixtures are generated conditionally, without change
-        temp_generator_dict = deepcopy(self.generator_dict())
-
-        if isclass(estimator):
-            estimator_class = estimator
-        else:
-            estimator_class = type(estimator)
-
-        def _generate_estimator_class(test_name, **kwargs):
-            return [estimator_class], [estimator_class.__name__]
-
-        def _generate_estimator_instance(test_name, **kwargs):
-            return [estimator.clone()], [estimator_class.__name__]
-
-        def _generate_estimator_instance_cls(test_name, **kwargs):
-            return estimator_class.create_test_instances_and_names()
-
-        temp_generator_dict["estimator_class"] = _generate_estimator_class
-
-        if not isclass(estimator):
-            temp_generator_dict["estimator_instance"] = _generate_estimator_instance
-        else:
-            temp_generator_dict["estimator_instance"] = _generate_estimator_instance_cls
-        # override of generator_dict end, temp_generator_dict is now prepared
-
-        # sub-setting to specific tests to run, if tests or fixtures were speified
-        if tests_to_run is None and fixtures_to_run is None:
-            test_names_subset = test_names
-        else:
-            test_names_subset = []
-            if tests_to_run is not None:
-                test_names_subset += list(set(test_names).intersection(tests_to_run))
-            if fixtures_to_run is not None:
-                # fixture codes contain the test as substring until the first "["
-                tests_from_fixt = [fixt.split("[")[0] for fixt in fixtures_to_run]
-                test_names_subset += list(set(test_names).intersection(tests_from_fixt))
-            test_names_subset = list(set(test_names_subset))
-
-        # sub-setting by removing all tests from tests_to_exclude
-        if tests_to_exclude is not None:
-            test_names_subset = list(
-                set(test_names_subset).difference(tests_to_exclude)
-            )
-
-        # the below loops run all the tests and collect the results here:
-        results = dict()
-        # loop A: we loop over all the tests
-        for test_name in test_names_subset:
-            test_fun = getattr(self, test_name)
-            fixture_sequence = self.fixture_sequence
-
-            # all arguments except the first one (self)
-            fixture_vars = getfullargspec(test_fun)[0][1:]
-            fixture_vars = [var for var in fixture_sequence if var in fixture_vars]
-
-            # this call retrieves the conditional fixtures
-            #  for the test test_name, and the estimator
-            _, fixture_prod, fixture_names = create_conditional_fixtures_and_names(
-                test_name=test_name,
-                fixture_vars=fixture_vars,
-                generator_dict=temp_generator_dict,
-                fixture_sequence=fixture_sequence,
-                raise_exceptions=raise_exceptions,
-            )
-
-            # if function is decorated with mark.parametrize, add variable settings
-            # NOTE: currently this works only with single-variable mark.parametrize
-            if hasattr(test_fun, "pytestmark"):
-                if len([x for x in test_fun.pytestmark if x.name == "parametrize"]) > 0:
-                    # get the three lists from pytest
-                    (
-                        pytest_fixture_vars,
-                        pytest_fixture_prod,
-                        pytest_fixture_names,
-                    ) = self._get_pytest_mark_args(test_fun)
-                    # add them to the three lists from conditional fixtures
-                    fixture_vars, fixture_prod, fixture_names = self._product_fixtures(
-                        fixture_vars,
-                        fixture_prod,
-                        fixture_names,
-                        pytest_fixture_vars,
-                        pytest_fixture_prod,
-                        pytest_fixture_names,
-                    )
-
-            # loop B: for each test, we loop over all fixtures
-            for params, fixt_name in zip(fixture_prod, fixture_names):
-                # this is needed because pytest unwraps 1-tuples automatically
-                # but subsequent code assumes params is k-tuple, no matter what k is
-                if len(fixture_vars) == 1:
-                    params = (params,)
-                key = f"{test_name}[{fixt_name}]"
-                args = dict(zip(fixture_vars, params))
-
-                # we subset to test-fixtures to run by this, if given
-                #  key is identical to the pytest test-fixture string identifier
-                if fixtures_to_run is not None and key not in fixtures_to_run:
-                    continue
-                if fixtures_to_exclude is not None and key in fixtures_to_exclude:
-                    continue
-
-                if not raise_exceptions:
-                    try:
-                        test_fun(**deepcopy(args))
-                        results[key] = "PASSED"
-                    except Exception as err:
-                        results[key] = err
-                else:
-                    test_fun(**deepcopy(args))
-                    results[key] = "PASSED"
-
-        return results
-
-    @staticmethod
-    def _check_None_str_or_list_of_str(obj, var_name="obj"):
-        """Check that obj is None, str, or list of str, and coerce to list of str."""
-        if obj is not None:
-            msg = f"{var_name} must be None, str, or list of str"
-            if isinstance(obj, str):
-                obj = [obj]
-            if not isinstance(obj, list):
-                raise ValueError(msg)
-            if not np.all([isinstance(x, str) for x in obj]):
-                raise ValueError(msg)
-        return obj
-
-    @staticmethod
-    def _get_pytest_mark_args(fun):
-        """Get args from pytest mark annotation of function.
-
-        Parameters
-        ----------
-        fun: callable, any function
-
-        Returns
-        -------
-        pytest_fixture_vars: list of str
-            names of args participating in mark.parametrize marks, in pytest order
-        pytest_fixt_list: list of tuple
-            list of value tuples from the mark parameterization
-            i-th value in each tuple corresponds to i-th arg name in pytest_fixture_vars
-        pytest_fixt_names: list of str
-            i-th element is display name for i-th fixture setting in pytest_fixt_list
-        """
-        from itertools import product
-
-        marks = [x for x in fun.pytestmark if x.name == "parametrize"]
-
-        def to_str(obj):
-            return [str(x) for x in obj]
-
-        def get_id(mark):
-            if "ids" in mark.kwargs.keys():
-                return mark.kwargs["ids"]
-            else:
-                return to_str(range(len(mark.args[1])))
-
-        pytest_fixture_vars = [x.args[0] for x in marks]
-        pytest_fixt_raw = [x.args[1] for x in marks]
-        pytest_fixt_list = product(*pytest_fixt_raw)
-        pytest_fixt_names_raw = [get_id(x) for x in marks]
-        pytest_fixt_names = product(*pytest_fixt_names_raw)
-        pytest_fixt_names = ["-".join(x) for x in pytest_fixt_names]
-
-        return pytest_fixture_vars, pytest_fixt_list, pytest_fixt_names
-
-    @staticmethod
-    def _product_fixtures(
-        fixture_vars,
-        fixture_prod,
-        fixture_names,
-        pytest_fixture_vars,
-        pytest_fixture_prod,
-        pytest_fixture_names,
-    ):
-        """Compute products of two sets of fixture vars, values, names."""
-        from itertools import product
-
-        # product of fixture variable names = concatenation
-        fixture_vars_return = fixture_vars + pytest_fixture_vars
-
-        # this is needed because pytest unwraps 1-tuples automatically
-        # but subsequent code assumes params is k-tuple, no matter what k is
-        if len(fixture_vars) == 1:
-            fixture_prod = [(x,) for x in fixture_prod]
-
-        # product of fixture products = Cartesian product plus append tuples
-        fixture_prod_return = product(fixture_prod, pytest_fixture_prod)
-        fixture_prod_return = [sum(x, ()) for x in fixture_prod_return]
-
-        # product of fixture names = Cartesian product plus concat
-        fixture_names_return = product(fixture_names, pytest_fixture_names)
-        fixture_names_return = ["-".join(x) for x in fixture_names_return]
-
-        return fixture_vars_return, fixture_prod_return, fixture_names_return
-
-
-class TestAllObjects(BaseFixtureGenerator, QuickTester):
-    """Package level tests for all aeon objects."""
-
-    estimator_type_filter = "object"
-
-    def test_create_test_instance(self, estimator_class):
-        """Check create_test_instance logic and basic constructor functionality.
-
-        create_test_instance and create_test_instances_and_names are the
-        key methods used to create test instances in testing.
-        If this test does not pass, validity of the other tests cannot be guaranteed.
-
-        Also tests inheritance and super call logic in the constructor.
-
-        Tests that:
-        * create_test_instance results in an instance of estimator_class
-        * __init__ calls super.__init__
-        * _tags_dynamic attribute for tag inspection is present after construction
-        """
-        estimator = estimator_class.create_test_instance()
-
-        # Check that init does not construct object of other class than itself
-        assert isinstance(estimator, estimator_class), (
-            "object returned by create_test_instance must be an instance of the class, "
-            f"found {type(estimator)}"
-        )
-
-        msg = (
-            f"{estimator_class.__name__}.__init__ should call "
-            f"super({estimator_class.__name__}, self).__init__, "
-            "but that does not seem to be the case. Please ensure to call the "
-            f"parent class's constructor in {estimator_class.__name__}.__init__"
-        )
-        assert hasattr(estimator, "_tags_dynamic"), msg
-
-    def test_create_test_instances_and_names(self, estimator_class):
-        """Check that create_test_instances_and_names works.
-
-        create_test_instance and create_test_instances_and_names are the
-        key methods used to create test instances in testing.
-        If this test does not pass, validity of the other tests cannot be guaranteed.
-
-        Tests expected function signature of create_test_instances_and_names.
-        """
-        estimators, names = estimator_class.create_test_instances_and_names()
-
-        assert isinstance(estimators, list), (
-            "first return of create_test_instances_and_names must be a list, "
-            f"found {type(estimators)}"
-        )
-        assert isinstance(names, list), (
-            "second return of create_test_instances_and_names must be a list, "
-            f"found {type(names)}"
-        )
-
-        assert np.all([isinstance(est, estimator_class) for est in estimators]), (
-            "list elements of first return returned by create_test_instances_and_names "
-            "all must be an instance of the class"
-        )
-
-        assert np.all([isinstance(name, str) for name in names]), (
-            "list elements of second return returned by create_test_instances_and_names"
-            " all must be strings"
-        )
-
-        assert len(estimators) == len(names), (
-            "the two lists returned by create_test_instances_and_names must have "
-            "equal length"
-        )
-
-    def test_estimator_tags(self, estimator_class):
-        """Check conventions on estimator tags."""
-        Estimator = estimator_class
-
-        assert hasattr(Estimator, "get_class_tags")
-        all_tags = Estimator.get_class_tags()
-        assert isinstance(all_tags, dict)
-        assert all(isinstance(key, str) for key in all_tags.keys())
-        if hasattr(Estimator, "_tags"):
-            tags = Estimator._tags
-            msg = (
-                f"_tags attribute of {estimator_class} must be dict, "
-                f"but found {type(tags)}"
-            )
-            assert isinstance(tags, dict), msg
-            assert len(tags) > 0, f"_tags dict of class {estimator_class} is empty"
-            invalid_tags = [
-                tag for tag in tags.keys() if tag not in VALID_ESTIMATOR_TAGS
-            ]
-            assert len(invalid_tags) == 0, (
-                f"_tags of {estimator_class} contains invalid tags: {invalid_tags}. "
-                "For a list of valid tags, see registry.all_tags, or registry._tags. "
-            )
-
-        # Avoid ambiguous class attributes
-        ambiguous_attrs = ("tags", "tags_")
-        for attr in ambiguous_attrs:
-            assert not hasattr(Estimator, attr), (
-                f"Please avoid using the {attr} attribute to disambiguate it from "
-                f"estimator tags."
-            )
-
-    def test_inheritance(self, estimator_class):
-        """Check that estimator inherits from BaseObject and/or BaseEstimator."""
-        assert issubclass(
-            estimator_class, BaseObject
-        ), f"object {estimator_class} is not a sub-class of BaseObject."
-
-        if hasattr(estimator_class, "fit"):
-            assert issubclass(estimator_class, BaseEstimator), (
-                f"estimator: {estimator_class} has fit method, but"
-                f"is not a sub-class of BaseEstimator."
-            )
-
-        # Usually estimators inherit only from one BaseEstimator type, but in some cases
-        # they may be predictor and transformer at the same time (e.g. pipelines)
-        n_base_types = sum(
-            issubclass(estimator_class, cls) for cls in VALID_ESTIMATOR_BASE_TYPES
-        )
-
-        assert 2 >= n_base_types >= 1
-
-    def test_has_common_interface(self, estimator_class):
-        """Check estimator implements the common interface."""
-        estimator = estimator_class
-
-        # Check class for type of attribute
-        if isinstance(estimator_class, BaseEstimator):
-            assert isinstance(estimator.is_fitted, property)
-
-        required_methods = _list_required_methods(estimator_class)
-
-        for attr in required_methods:
-            assert hasattr(
-                estimator, attr
-            ), f"Estimator: {estimator.__name__} does not implement attribute: {attr}"
-
-        if hasattr(estimator, "inverse_transform"):
-            assert hasattr(estimator, "transform")
-        if hasattr(estimator, "predict_proba"):
-            assert hasattr(estimator, "predict")
-
-    def test_no_cross_test_side_effects_part1(self, estimator_instance):
-        """Test that there are no side effects across tests, through estimator state."""
-        estimator_instance.test__attr = 42
-
-    def test_no_cross_test_side_effects_part2(self, estimator_instance):
-        """Test that there are no side effects across tests, through estimator state."""
-        assert not hasattr(estimator_instance, "test__attr")
-
-    @pytest.mark.parametrize("a", [True, 42])
-    def test_no_between_test_case_side_effects(self, estimator_instance, scenario, a):
-        """Test that there are no side effects across instances of the same test."""
-        assert not hasattr(estimator_instance, "test__attr")
-        estimator_instance.test__attr = 42
-
-    def test_get_params(self, estimator_instance):
-        """Check that get_params works correctly."""
-        estimator = estimator_instance
-        params = estimator.get_params()
-        assert isinstance(params, dict)
-        _check_get_params_invariance(estimator.__class__.__name__, estimator)
-
-    def test_set_params(self, estimator_instance):
-        """Check that set_params works correctly."""
-        estimator = estimator_instance
-        params = estimator.get_params()
-
-        msg = f"set_params of {type(estimator).__name__} does not return self"
-        assert estimator.set_params(**params) is estimator, msg
-
-        is_equal, equals_msg = deep_equals(
-            estimator.get_params(), params, return_msg=True
-        )
-        msg = (
-            f"get_params result of {type(estimator).__name__} (x) does not match "
-            f"what was passed to set_params (y). Reason for discrepancy: {equals_msg}"
-        )
-        assert is_equal, msg
-
-    def test_set_params_sklearn(self, estimator_class):
-        """Check that set_params works correctly, mirrors sklearn check_set_params.
-
-        Instead of the "fuzz values" in sklearn's check_set_params,
-        we use the other test parameter settings (which are assumed valid).
-        This guarantees settings which play along with the __init__ content.
-        """
-        estimator = estimator_class.create_test_instance()
-        test_params = estimator_class.get_test_params()
-        if not isinstance(test_params, list):
-            test_params = [test_params]
-
-        for params in test_params:
-            # we construct the full parameter set for params
-            # params may only have parameters that are deviating from defaults
-            # in order to set non-default parameters back to defaults
-            params_full = estimator_class.get_param_defaults()
-            params_full.update(params)
-
-            msg = f"set_params of {estimator_class.__name__} does not return self"
-            est_after_set = estimator.set_params(**params_full)
-            assert est_after_set is estimator, msg
-
-            is_equal, equals_msg = deep_equals(
-                estimator.get_params(deep=False), params_full, return_msg=True
-            )
-            msg = (
-                f"get_params result of {estimator_class.__name__} (x) does not match "
-                f"what was passed to set_params (y). "
-                f"Reason for discrepancy: {equals_msg}"
-            )
-            assert is_equal, msg
-
-    def test_clone(self, estimator_instance):
-        """Check that clone method does not raise exceptions and results in a clone.
-
-        A clone of an object x is an object that:
-        * has same class and parameters as x
-        * is not identical with x
-        * is unfitted (even if x was fitted)
-        """
-        est_clone = estimator_instance.clone()
-        assert isinstance(est_clone, type(estimator_instance))
-        assert est_clone is not estimator_instance
-        if hasattr(est_clone, "is_fitted"):
-            assert not est_clone.is_fitted
-
-    def test_repr(self, estimator_instance):
-        """Check that __repr__ call to instance does not raise exceptions."""
-        estimator = estimator_instance
-        repr(estimator)
-
-    def test_constructor(self, estimator_class):
-        """Check that the constructor has sklearn compatible signature and behaviour.
-
-        Based on sklearn check_estimator testing of __init__ logic.
-        Uses create_test_instance to create an instance.
-        Assumes test_create_test_instance has passed and certified create_test_instance.
-
-        Tests that:
-        * constructor has no varargs
-        * tests that constructor constructs an instance of the class
-        * tests that all parameters are set in init to an attribute of the same name
-        * tests that parameter values are always copied to the attribute and not changed
-        * tests that default parameters are one of the following:
-            None, str, int, float, bool, tuple, function, joblib memory, numpy primitive
-            (other type parameters should be None, default handling should be by writing
-            the default to attribute of a different name, e.g., my_param_ not my_param)
-        """
-        msg = "constructor __init__ should have no varargs"
-        assert getfullargspec(estimator_class.__init__).varkw is None, msg
-
-        estimator = estimator_class.create_test_instance()
-        assert isinstance(estimator, estimator_class)
-
-        # Ensure that each parameter is set in init
-        init_params = _get_args(type(estimator).__init__)
-        invalid_attr = set(init_params) - set(vars(estimator)) - {"self"}
-        assert not invalid_attr, (
-            "Estimator %s should store all parameters"
-            " as an attribute during init. Did not find "
-            "attributes `%s`." % (estimator.__class__.__name__, sorted(invalid_attr))
-        )
-
-        # Ensure that init does nothing but set parameters
-        # No logic/interaction with other parameters
-        def param_filter(p):
-            """Identify hyper parameters of an estimator."""
-            return p.name != "self" and p.kind not in [p.VAR_KEYWORD, p.VAR_POSITIONAL]
-
-        init_params = [
-            p
-            for p in signature(estimator.__init__).parameters.values()
-            if param_filter(p)
-        ]
-
-        params = estimator.get_params()
-
-        test_params = estimator_class.get_test_params()
-        if isinstance(test_params, list):
-            test_params = test_params[0]
-        test_params = test_params.keys()
-
-        init_params = [param for param in init_params if param.name not in test_params]
-
-        for param in init_params:
-            assert param.default != param.empty, (
-                "parameter `%s` for %s has no default value and is not "
-                "set in `get_test_params`" % (param.name, estimator.__class__.__name__)
-            )
-            if type(param.default) is type:
-                assert param.default in [np.float64, np.int64]
-            else:
-                assert type(param.default) in [
-                    str,
-                    int,
-                    float,
-                    bool,
-                    tuple,
-                    type(None),
-                    np.float64,
-                    types.FunctionType,
-                    joblib.Memory,
-                ]
-
-            param_value = params[param.name]
-            if isinstance(param_value, np.ndarray):
-                np.testing.assert_array_equal(param_value, param.default)
-            else:
-                if bool(
-                    isinstance(param_value, numbers.Real) and np.isnan(param_value)
-                ):
-                    # Allows to set default parameters to np.nan
-                    assert param_value is param.default, param.name
-                else:
-                    assert param_value == param.default, param.name
-
-    def test_valid_estimator_class_tags(self, estimator_class):
-        """Check that Estimator class tags are in VALID_ESTIMATOR_TAGS."""
-        for tag in estimator_class.get_class_tags().keys():
-            assert tag in VALID_ESTIMATOR_TAGS
-
-    def test_valid_estimator_tags(self, estimator_instance):
-        """Check that Estimator tags are in VALID_ESTIMATOR_TAGS."""
-        for tag in estimator_instance.get_tags().keys():
-            assert tag in VALID_ESTIMATOR_TAGS
-
-
-class TestAllEstimators(BaseFixtureGenerator, QuickTester):
-    """Package level tests for all aeon estimators, i.e., objects with fit."""
-
-    def test_raises_not_fitted_error(self, estimator_instance, scenario, method_nsc):
-        """Check exception raised for non-fit method calls to unfitted estimators.
-
-        Tries to run all methods in NON_STATE_CHANGING_METHODS with valid scenario,
-        but before fit has been called on the estimator.
-
-        This should raise a NotFittedError if correctly caught,
-        normally by a self.check_is_fitted() call in the method's boilerplate.
-
-        Raises
-        ------
-        Exception if NotFittedError is not raised by non-state changing method
-        """
-        # call methods without prior fitting and check that they raise NotFittedError
-        with pytest.raises(NotFittedError, match=r"has not been fitted"):
-            scenario.run(estimator_instance, method_sequence=[method_nsc])
-
-    def test_non_state_changing_method_contract(
-        self, estimator_instance, scenario, method_nsc
-    ):
-        """Check that non-state-changing methods behave as per interface contract.
-
-        Check the following contract on non-state-changing methods:
-        1. do not change state of the estimator, i.e., any attributes
-            (including hyper-parameters and fitted parameters)
-        2. expected output type of the method matches actual output type
-            - only for abstract BaseEstimator methods, common to all estimators.
-            List of BaseEstimator methods tested: get_fitted_params
-            Subclass specific method outputs are tested in TestAll[estimatortype] class
-        3. the state of method arguments does not change
-        """
-        estimator = estimator_instance
-        set_random_state(estimator)
-
-        _, args_after = scenario.run(
-            estimator, method_sequence=["fit"], return_args=True
-        )
-        fit_args_after = args_after[0]
-        fit_args_before = scenario.args["fit"]
-        assert deep_equals(
-            fit_args_before, fit_args_after
-        ), f"Estimator: {type(estimator)} has side effects on arguments of fit"
-
-        # dict_before = copy of dictionary of estimator before predict, post fit
-        dict_before = estimator.__dict__.copy()
-
-        # skip test if predict_proba is not implemented
-        if method_nsc == "predict_proba":
-            try:
-                output, args_after = scenario.run(
-                    estimator, method_sequence=[method_nsc], return_args=True
-                )
-            except NotImplementedError:
-                return None
-        else:
-            output, args_after = scenario.run(
-                estimator, method_sequence=[method_nsc], return_args=True
-            )
-
-        method_args_after = args_after[0]
-        method_args_before = scenario.get_args(method_nsc, estimator)
-
-        assert deep_equals(method_args_after, method_args_before), (
-            f"Estimator: {type(estimator)} has side effects on arguments of "
-            f"{method_nsc}"
-        )
-
-        # dict_after = dictionary of estimator after predict and fit
-        dict_after = estimator.__dict__
-        is_equal, msg = deep_equals(dict_after, dict_before, return_msg=True)
-        assert is_equal, (
-            f"Estimator: {type(estimator).__name__} changes __dict__ "
-            f"during {method_nsc}, "
-            f"reason/location of discrepancy (x=after, y=before): {msg}"
-        )
-
-        # test get_fitted_params here to avoid extra fit calls
-        if method_nsc == "get_fitted_params":
-            msg = (
-                f"get_fitted_params of {type(estimator)} should return dict, "
-                f"but returns object of type {type(output)}"
-            )
-            assert isinstance(output, dict), msg
-
-            nonstr = [x for x in output.keys() if not isinstance(x, str)]
-            if not len(nonstr) == 0:
-                msg = (
-                    f"get_fitted_params of {type(estimator)} should return dict with "
-                    f"with str keys, but some keys are not str."
-                    f"found {nonstr}"
-                )
-                raise AssertionError(msg)
-
-    def test_fit_updates_state(self, estimator_instance, scenario):
-        """Check fit/update state change.
-
-        1. Check estimator_instance calls base class constructor
-        2. Check is_fitted attribute is set correctly to False before fit, at init
-            This is testing base class functionality, but its fast
-        3. Check fit returns self
-        4. Check is_fitted attribute is updated correctly to True after calling fit
-        5. Check estimator hyper parameters are not changed in fit
-        """
-        # Check that fit updates the is-fitted states
-        attrs = ["_is_fitted", "is_fitted"]
-
-        estimator = estimator_instance
-        estimator_class = type(estimator_instance)
-
-        msg = (
-            f"{estimator_class.__name__}.__init__ should call "
-            f"super({estimator_class.__name__}, self).__init__, "
-            "but that does not seem to be the case. Please ensure to call the "
-            f"parent class's constructor in {estimator_class.__name__}.__init__"
-        )
-        assert hasattr(estimator, "_is_fitted"), msg
-
-        # Check is_fitted attribute is set correctly to False before fit, at init
-        for attr in attrs:
-            assert not getattr(
-                estimator, attr
-            ), f"Estimator: {estimator} does not initiate attribute: {attr} to False"
-        # Make a physical copy of the original estimator parameters before fitting.
-        set_random_state(estimator)
-        params = estimator.get_params()
-        original_params = deepcopy(params)
-
-        fitted_estimator = scenario.run(estimator_instance, method_sequence=["fit"])
-        # Check fit returns self
-        assert (
-            fitted_estimator is estimator_instance
-        ), f"Estimator: {estimator_instance} does not return self when calling fit"
-
-        # Check is_fitted attribute is updated correctly to True after calling fit
-        for attr in attrs:
-            assert getattr(
-                fitted_estimator, attr
-            ), f"Estimator: {estimator} does not update attribute: {attr} during fit"
-
-        # Compare the state of the model parameters with the original parameters
-        new_params = fitted_estimator.get_params()
-        for param_name, original_value in original_params.items():
-            new_value = new_params[param_name]
-
-            # We should never change or mutate the internal state of input
-            # parameters by default. To check this we use the joblib.hash function
-            # that introspects recursively any subobjects to compute a checksum.
-            # The only exception to this rule of immutable constructor parameters
-            # is possible RandomState instance but in this check we explicitly
-            # fixed the random_state params recursively to be integer seeds.
-            assert joblib.hash(new_value) == joblib.hash(original_value), (
-                "Estimator %s should not change or mutate "
-                " the parameter %s from %s to %s during fit."
-                % (estimator.__class__.__name__, param_name, original_value, new_value)
-            )
-
-    def test_fit_deterministic(
-        self, estimator_instance, scenario, method_nsc_arraylike
-    ):
-        """Test that fit is deterministic.
-
-        Check that calling fit twice is equivalent to calling it once, and also
-        tests pickling (done here to save time).
-        """
-        # escape known non-deterministic estimators
-        if estimator_instance.get_tag(
-            "non-deterministic", tag_value_default=False, raise_error=False
-        ):
-            return None
-
-        # run fit plus method_nsc once, save results
-        set_random_state(estimator_instance)
-        results = scenario.run(
-            estimator_instance,
-            method_sequence=["fit", method_nsc_arraylike],
-            return_all=True,
-            deepcopy_return=True,
-        )
-
-        estimator = results[0]
-        set_random_state(estimator)
-
-        # run fit plus method_nsc a second time
-        results_2nd = scenario.run(
-            estimator,
-            method_sequence=["fit", method_nsc_arraylike],
-            return_all=True,
-            deepcopy_return=True,
-        )
-
-        # check results are equal
-        _assert_array_almost_equal(
-            results[1],
-            results_2nd[1],
-            err_msg=f"Running {method_nsc_arraylike} after fit twice with test "
-            f"parameters gives different results.",
-        )
-
-    def test_persistence_via_pickle(
-        self, estimator_instance, scenario, method_nsc_arraylike
-    ):
-        """Check that we can pickle all estimators."""
-        method_nsc = method_nsc_arraylike
-
-        # escape estimators we know cannot pickle. For saving there is an argument to
-        # be made that alternate methods of saving should be available, but currently
-        # this is not the case
-        if estimator_instance.get_tag(
-            "cant-pickle", tag_value_default=False, raise_error=False
-        ):
-            return None
-
-        estimator = estimator_instance
-        set_random_state(estimator)
-        # Fit the model, get args before and after
-        scenario.run(estimator, method_sequence=["fit"], return_args=True)
-
-        # Generate results before pickling
-        vanilla_result = scenario.run(estimator, method_sequence=[method_nsc])
-
-        # Serialize and deserialize
-        serialized_estimator = pickle.dumps(estimator)
-        deserialized_estimator = pickle.loads(serialized_estimator)
-        deserialized_result = scenario.run(
-            deserialized_estimator, method_sequence=[method_nsc]
-        )
-
-        _assert_array_almost_equal(
-            vanilla_result,
-            deserialized_result,
-            decimal=6,
-            err_msg=(
-                f"Results of {method_nsc} difference between when pickling and not "
-                f"pickling, estimator {type(estimator_instance).__name__}"
-            ),
-        )
-
-    def test_dl_constructor_initializes_deeply(self, estimator_class):
-        """Test DL estimators that they pass custom parameters to underlying Network."""
-        estimator = estimator_class
-
-        if not issubclass(estimator, (BaseDeepClassifier, BaseDeepRegressor)):
-            return None
-
-        if not hasattr(estimator, "get_test_params"):
-            return None
-
-        params = estimator.get_test_params()
-
-        if isinstance(params, list):
-            params = params[0]
-        if isinstance(params, dict):
-            pass
-        else:
-            raise TypeError(
-                f"`get_test_params()` of estimator: {estimator} returns "
-                f"an expected type: {type(params)}, acceptable formats: [list, dict]"
-            )
-
-        estimator = estimator(**params)
-
-        for key, value in params.items():
-            assert vars(estimator)[key] == value
-            # some keys are only relevant to the final model (eg: n_epochs)
-            # skip them for the underlying network
-            if vars(estimator._network).get(key) is not None:
-                assert vars(estimator._network)[key] == value
-
-    @pytest.mark.skip(reason="hangs on mac and unix remote tests")
-    def test_multiprocessing_idempotent(
-        self, estimator_instance, scenario, method_nsc_arraylike
-    ):
-        """Test that single and multi-process run results are identical.
-
-        Check that running an estimator on a single process is no different to running
-        it on multiple processes. We also check that we can set n_jobs=-1 to make use
-        of all CPUs. The test is not really necessary though, as we rely on joblib for
-        parallelization and can trust that it works as expected.
-        """
-        method_nsc = method_nsc_arraylike
-        params = estimator_instance.get_params()
-
-        if "n_jobs" in params:
-            # run on a single process
-            # -----------------------
-            estimator = deepcopy(estimator_instance)
-            estimator.set_params(n_jobs=1)
-            set_random_state(estimator)
-            result_single_process = scenario.run(
-                estimator, method_sequence=["fit", method_nsc]
-            )
-
-            # run on multiple processes
-            # -------------------------
-            estimator = deepcopy(estimator_instance)
-            estimator.set_params(n_jobs=-1)
-            set_random_state(estimator)
-            result_multiple_process = scenario.run(
-                estimator, method_sequence=["fit", method_nsc]
-            )
-            _assert_array_equal(
-                result_single_process,
-                result_multiple_process,
-                err_msg="Results are not equal for n_jobs=1 and n_jobs=-1",
-            )
diff --git a/aeon/testing/test_config.py b/aeon/testing/testing_config.py
similarity index 64%
rename from aeon/testing/test_config.py
rename to aeon/testing/testing_config.py
index fca6716e6d..7ed1fab9c1 100644
--- a/aeon/testing/test_config.py
+++ b/aeon/testing/testing_config.py
@@ -1,6 +1,6 @@
 """Test configuration."""
 
-__maintainer__ = []
+__maintainer__ = ["MatthewMiddlehurst"]
 __all__ = ["EXCLUDE_ESTIMATORS", "EXCLUDED_TESTS"]
 
 import aeon.testing.utils._cicd_numba_caching  # noqa: F401
@@ -16,9 +16,12 @@
 # per os/version default is False, can be set to True by pytest --prtesting True flag
 PR_TESTING = False
 
-# Exclude estimators here for short term fixes
-EXCLUDE_ESTIMATORS = []
-
+# Exclude estimators here for short-term fixes
+EXCLUDE_ESTIMATORS = [
+    "SeriesSearch",
+    "QuerySearch",
+    "ClearSkyTransformer",
+]
 
 EXCLUDED_TESTS = {
     # Early classifiers (EC) intentionally retain information from previous predict
@@ -27,16 +30,16 @@
     # (test_save_estimators_to_file) are due to predict/predict_proba returning two
     # items and that breaking assert_array_equal.
     "TEASER": [  # EC
-        "test_non_state_changing_method_contract",
-        "test_fit_deterministic",
-        "test_persistence_via_pickle",
-        "test_save_estimators_to_file",
+        "check_non_state_changing_method",
+        "check_fit_deterministic",
+        "check_persistence_via_pickle",
+        "check_save_estimators_to_file",
     ],
     "ProbabilityThresholdEarlyClassifier": [  # EC
-        "test_non_state_changing_method_contract",
-        "test_fit_deterministic",
-        "test_persistence_via_pickle",
-        "test_save_estimators_to_file",
+        "check_non_state_changing_method",
+        "check_fit_deterministic",
+        "check_persistence_via_pickle",
+        "check_save_estimators_to_file",
     ],
     # has a keras fail, unknown reason, see #1387
     "LearningShapeletClassifier": ["check_fit_deterministic"],
@@ -46,8 +49,25 @@
     # needs investigation
     "SASTClassifier": ["check_fit_deterministic"],
     "RSASTClassifier": ["check_fit_deterministic"],
+    "SAST": ["check_fit_deterministic"],
+    "RSAST": ["check_fit_deterministic"],
     "AEFCNClusterer": ["check_fit_updates_state"],
     "AEResNetClusterer": ["check_fit_updates_state"],
+    "PyODAdapter": ["check_fit_updates_state"],
+    "SFA": ["check_persistence_via_pickle", "check_fit_deterministic"],
+    # missed in legacy testing, changes state in predict/transform
+    "DWT_MLEAD": ["check_non_state_changing_method"],
+    "STOMP": ["check_non_state_changing_method"],
+    "FLUSSSegmenter": ["check_non_state_changing_method"],
+    "InformationGainSegmenter": ["check_non_state_changing_method"],
+    "GreedyGaussianSegmenter": ["check_non_state_changing_method"],
+    "ClaSPSegmenter": ["check_non_state_changing_method"],
+    "HMMSegmenter": ["check_non_state_changing_method"],
+    "BinSegSegmenter": ["check_non_state_changing_method"],
+    "QUANTTransformer": ["check_non_state_changing_method"],
+    "MatrixProfileSeriesTransformer": ["check_non_state_changing_method"],
+    "PLASeriesTransformer": ["check_non_state_changing_method"],
+    "AutoCorrelationSeriesTransformer": ["check_non_state_changing_method"],
 }
 
 # We use estimator tags in addition to class hierarchies to further distinguish
diff --git a/aeon/testing/testing_data.py b/aeon/testing/testing_data.py
index 733736567d..76cef87df3 100644
--- a/aeon/testing/testing_data.py
+++ b/aeon/testing/testing_data.py
@@ -5,9 +5,11 @@
 from aeon.anomaly_detection.base import BaseAnomalyDetector
 from aeon.base import BaseCollectionEstimator, BaseSeriesEstimator
 from aeon.classification import BaseClassifier
+from aeon.classification.early_classification import BaseEarlyClassifier
 from aeon.clustering import BaseClusterer
 from aeon.regression import BaseRegressor
 from aeon.segmentation import BaseSegmenter
+from aeon.similarity_search import BaseSimilaritySearch
 from aeon.testing.data_generation import (
     make_example_1d_numpy,
     make_example_2d_dataframe_collection,
@@ -752,19 +754,19 @@
 }
 
 X_series = make_example_1d_numpy(
-    n_timepoints=30, random_state=data_rng.randint(np.iinfo(np.int32).max)
+    n_timepoints=40, random_state=data_rng.randint(np.iinfo(np.int32).max)
 )
-X_series2 = X_series[20:30]
+X_series2 = X_series[20:40]
 X_series = X_series[:20]
 UNIVARIATE_SERIES_NOLABEL = {"train": (X_series, None), "test": (X_series2, None)}
 
 X_series_mv = make_example_2d_numpy_series(
-    n_timepoints=30,
+    n_timepoints=40,
     n_channels=2,
     axis=1,
     random_state=data_rng.randint(np.iinfo(np.int32).max),
 )
-X_series_mv2 = X_series_mv[:, 20:30]
+X_series_mv2 = X_series_mv[:, 20:40]
 X_series_mv = X_series_mv[:, :20]
 MULTIVARIATE_SERIES_NOLABEL = {
     "train": (X_series_mv, None),
@@ -772,10 +774,10 @@
 }
 
 X_series_mi = make_example_1d_numpy(
-    n_timepoints=30, random_state=data_rng.randint(np.iinfo(np.int32).max)
+    n_timepoints=40, random_state=data_rng.randint(np.iinfo(np.int32).max)
 )
-X_series_mi2 = X_series_mi[20:30]
-X_series_mi2[data_rng.choice(10, 1)] = np.nan
+X_series_mi2 = X_series_mi[20:40]
+X_series_mi2[data_rng.choice(20, 1)] = np.nan
 X_series_mi = X_series_mi[:20]
 X_series_mi[data_rng.choice(20, 2)] = np.nan
 MISSING_VALUES_NOLABEL = {"train": (X_series_mi, None), "test": (X_series_mi2, None)}
@@ -952,8 +954,10 @@ def _get_label_type_for_estimator(estimator):
     """
     if (
         isinstance(estimator, BaseClassifier)
+        or isinstance(estimator, BaseEarlyClassifier)
         or isinstance(estimator, BaseClusterer)
         or isinstance(estimator, BaseCollectionTransformer)
+        or isinstance(estimator, BaseSimilaritySearch)
     ):
         label_type = "Classification"
     elif isinstance(estimator, BaseRegressor):
diff --git a/aeon/testing/tests/test_all_estimators.py b/aeon/testing/tests/test_all_estimators.py
index 372aae833e..81f8bd0266 100644
--- a/aeon/testing/tests/test_all_estimators.py
+++ b/aeon/testing/tests/test_all_estimators.py
@@ -5,13 +5,10 @@
 
 from aeon.registry import all_estimators
 from aeon.testing.estimator_checking import parametrize_with_checks
-from aeon.testing.test_config import PR_TESTING
+from aeon.testing.testing_config import PR_TESTING
 from aeon.utils.sampling import random_partition
 
-ALL_ESTIMATORS = all_estimators(
-    estimator_types=["classifier", "regressor", "clusterer"],
-    return_names=False,
-)
+ALL_TEST_ESTIMATORS = all_estimators(return_names=False)
 
 # subsample estimators by OS & python version
 # this ensures that only a 1/3 of estimators are tested for a given combination
@@ -37,11 +34,12 @@
     ix = ix % 3
 
     ALL_ESTIMATORS = [
-        ALL_ESTIMATORS[i] for i in random_partition(len(ALL_ESTIMATORS), 3)[ix]
+        ALL_TEST_ESTIMATORS[i]
+        for i in random_partition(len(ALL_TEST_ESTIMATORS), 3)[ix]
     ]
 
 
-@parametrize_with_checks(ALL_ESTIMATORS)
+@parametrize_with_checks(ALL_TEST_ESTIMATORS)
 def test_all_estimators(check):
     """Run general estimator checks on all aeon estimators."""
     check()
diff --git a/aeon/testing/tests/test_softdeps.py b/aeon/testing/tests/test_softdeps.py
index c6dbc84f83..830d0e80f0 100644
--- a/aeon/testing/tests/test_softdeps.py
+++ b/aeon/testing/tests/test_softdeps.py
@@ -1,8 +1,4 @@
-"""Tests that soft dependencies are handled correctly.
-
-aeon supports a number of soft dependencies which are necessary for using
-a certain module or estimator but otherwise not necessary.
-"""
+"""Tests that soft dependencies are handled correctly in modules."""
 
 __maintainer__ = []
 
@@ -13,13 +9,7 @@
 import pytest
 
 import aeon
-from aeon.registry import all_estimators
-from aeon.testing.test_config import EXCLUDE_ESTIMATORS, PR_TESTING
-from aeon.testing.utils.scenarios_getter import retrieve_scenarios
-from aeon.utils.validation._dependencies import (
-    _check_python_version,
-    _check_soft_dependencies,
-)
+from aeon.testing.testing_config import PR_TESTING
 
 # collect all modules
 modules = pkgutil.walk_packages(aeon.__path__, aeon.__name__ + ".")
@@ -57,183 +47,3 @@ def test_module_soft_deps(module):
             f"but tried importing: '{dependency}'. Make sure soft dependencies are "
             f"properly isolated."
         ) from e
-
-
-# TODO test revamp: this can be part a greater check of all estimators probably, dont
-# need to discover all estimators again here
-
-
-def _has_soft_dep(est):
-    """Return whether an estimator has soft dependencies."""
-    softdep = est.get_class_tag("python_dependencies", None)
-    return softdep is not None
-
-
-def _coerce_list_of_str(obj):
-    """Coerce obj to list of str."""
-    if obj is None:
-        return []
-    elif isinstance(obj, str):
-        return [obj]
-    elif isinstance(obj, list):
-        return obj
-
-
-def _get_soft_deps(est):
-    """Return soft dependencies of an estimator, as list of str."""
-    softdeps = est.get_class_tag("python_dependencies", None)
-    softdeps = _coerce_list_of_str(softdeps)
-    if softdeps is None:
-        raise RuntimeError(
-            'error, "python_dependencies" tag must be None, str or list of str,'
-            f" but {est.__name__} has {softdeps}"
-        )
-    else:
-        return softdeps
-
-
-def _is_in_env(modules):
-    """Return whether all modules in list of str modules are installed in env."""
-    modules = _coerce_list_of_str(modules)
-    try:
-        for module in modules:
-            _check_soft_dependencies(module, severity="error")
-        return True
-    except ModuleNotFoundError:
-        return False
-
-
-# all estimators - exclude estimators on the global exclusion list
-all_ests = all_estimators(return_names=False, exclude_estimators=EXCLUDE_ESTIMATORS)
-
-
-# estimators that should fail to construct because of python version
-est_python_incompatible = [
-    est for est in all_ests if not _check_python_version(est, severity="none")
-]
-
-# estimators that have soft dependencies
-est_with_soft_dep = [est for est in all_ests if _has_soft_dep(est)]
-# estimators that have soft dependencies and are python compatible
-est_pyok_with_soft_dep = [
-    est for est in est_with_soft_dep if _check_python_version(est, severity="none")
-]
-
-# estimators that have no soft dependenies
-est_without_soft_dep = [est for est in all_ests if not _has_soft_dep(est)]
-# estimators that have soft dependencies and are python compatible
-est_pyok_without_soft_dep = [
-    est for est in est_without_soft_dep if _check_python_version(est, severity="none")
-]
-
-# all estimators are now a disjoint union of the three sets:
-# est_python_incompatible - python incompatible, should raise python error
-# est_pyok_without_soft_dep - python compatible, has no soft dependency
-# est_pyok_with_soft_dep - python compatible, has soft dependency
-
-
-@pytest.mark.parametrize("estimator", est_python_incompatible)
-def test_python_error(estimator):
-    """Test that estimators raise error if python version is wrong."""
-    try:
-        estimator.create_test_instance()
-    except ModuleNotFoundError as e:
-        error_msg = str(e)
-
-        # Check if appropriate exception with useful error message is raised as
-        # defined in the `_check_python` function
-        expected_error_msg = "requires python version to be"
-        if expected_error_msg not in error_msg:
-            pyspec = estimator.get_class_tag("python_version", None)
-            raise RuntimeError(
-                f"Estimator {estimator.__name__} has python version bound "
-                f"{pyspec} according to tags, but does not raise an appropriate "
-                f"error message on __init__ for incompatible python environments. "
-                f"Likely reason is that __init__ does not call super(cls).__init__."
-            ) from e
-
-
-@pytest.mark.parametrize("estimator", est_pyok_with_soft_dep)
-def test_softdep_error(estimator):
-    """Test that estimators raise error if required soft dependencies are missing."""
-    softdeps = _get_soft_deps(estimator)
-    if not _is_in_env(softdeps):
-        try:
-            estimator.create_test_instance()
-        except ModuleNotFoundError as e:
-            error_msg = str(e)
-
-            # Check if appropriate exception with useful error message is raised as
-            # defined in the `_check_soft_dependencies` function
-            expected_error_msg = (
-                "is a soft dependency and not included in the base aeon installation"
-            )
-            # message is different for deep learning deps tensorflow, tensorflow-proba
-            error_msg_alt = "required for deep learning"
-            if "incompatible version" in error_msg:
-                pass
-            elif expected_error_msg not in error_msg and error_msg_alt not in error_msg:
-                raise RuntimeError(
-                    f"Estimator {estimator.__name__} requires soft dependencies "
-                    f"{softdeps} according to tags, but does not raise an appropriate "
-                    f"error message on __init__, when the soft dependency is missing. "
-                    f"Likely reason is that __init__ does not call super(cls).__init__,"
-                    f" or imports super(cls).__init__ only after an attempted import."
-                ) from e
-
-
-@pytest.mark.parametrize("estimator", est_pyok_with_soft_dep)
-def test_est_construct_if_softdep_available(estimator):
-    """Test that estimators construct if required soft dependencies are there."""
-    softdeps = _get_soft_deps(estimator)
-    if _is_in_env(softdeps):
-        try:
-            estimator.create_test_instance()
-        except ModuleNotFoundError as e:
-            error_msg = str(e)
-            raise RuntimeError(
-                f"Estimator {estimator.__name__} requires soft dependencies "
-                f"{softdeps} according to tags, but raises ModuleNotFoundError "
-                f"on __init__ when those dependencies are in the environment. "
-                f" Likely cause is additionally needed soft dependencies, "
-                f"these should be added "
-                f'to the "python_dependencies" tag. Exception text: {error_msg}'
-            ) from e
-
-
-@pytest.mark.parametrize("estimator", est_pyok_without_soft_dep)
-def test_est_construct_without_modulenotfound(estimator):
-    """Test that estimators that do not require soft dependencies construct properly."""
-    try:
-        estimator.create_test_instance()
-    except ModuleNotFoundError as e:
-        error_msg = str(e)
-        raise RuntimeError(
-            f"Estimator {estimator.__name__} does not require soft dependencies "
-            f"according to tags, but raises ModuleNotFoundError "
-            f"on __init__ with test parameters. Any required soft dependencies should "
-            f'be added to the "python_dependencies" tag, and python version bounds '
-            f'should be added to the "python_version" tag. Exception text: {error_msg}'
-        ) from e
-
-
-@pytest.mark.parametrize("estimator", est_pyok_without_soft_dep)
-def test_est_fit_without_modulenotfound(estimator):
-    """Test that estimators that do not require soft dependencies fit properly."""
-    try:
-        scenarios = retrieve_scenarios(estimator)
-        if len(scenarios) == 0:
-            return None
-        else:
-            scenario = scenarios[0]
-        estimator_instance = estimator.create_test_instance()
-        scenario.run(estimator_instance, method_sequence=["fit"])
-    except ModuleNotFoundError as e:
-        error_msg = str(e)
-        raise RuntimeError(
-            f"Estimator {estimator.__name__} does not require soft dependencies "
-            f"according to tags, but raises ModuleNotFoundError "
-            f"on fit. Any required soft dependencies should be added "
-            f'to the "python_dependencies" tag, and python version bounds should be'
-            f' added to the "python_version" tag. Exception text: {error_msg}'
-        ) from e
diff --git a/aeon/testing/utils/_conditional_fixtures.py b/aeon/testing/utils/_conditional_fixtures.py
deleted file mode 100644
index dd4a42c8ec..0000000000
--- a/aeon/testing/utils/_conditional_fixtures.py
+++ /dev/null
@@ -1,247 +0,0 @@
-"""Testing utility for easy generation of conditional fixtures in pytest_generate_tests.
-
-Exports create_conditional_fixtures_and_names utility
-"""
-
-__maintainer__ = []
-
-__all__ = ["create_conditional_fixtures_and_names"]
-
-from copy import deepcopy
-from typing import Callable
-
-import numpy as np
-
-
-class FixtureGenerationError(Exception):
-    """Raised when a fixture fails to generate."""
-
-    def __init__(self, fixture_name="", err=None):
-        self.fixture_name = fixture_name
-        super().__init__(f"fixture {fixture_name} failed to generate. {err}")
-
-
-def create_conditional_fixtures_and_names(
-    test_name: str,
-    fixture_vars: list[str],
-    generator_dict: dict[str, Callable],
-    fixture_sequence: list[str] = None,
-    raise_exceptions: bool = False,
-    deepcopy_fixtures: bool = False,
-):
-    """Create conditional fixtures for pytest_generate_tests.
-
-    Creates arguments for pytest.fixture.parameterize,
-        using conditional fixture generation functions in generator_dict.
-
-    Example: we want to loop over two fixture variables, "number" and "multiples"
-        "number" are integers from 1 to 10,
-        "multiples" are multiples of "number" up to "number"-squared
-        we then write a generator_dict with two entries
-        generator_dict["number"] is a function (test_name, **kwargs) -> list
-            that returns [1, 2, ..., 10]
-        generator_dict["multiples"] is a function (test_name, number, **kwargs) -> list
-            that returns [number, 2* number, ..., number*number]
-
-    This function automatically creates the inputs for pytest.mark.parameterize
-        fixture_param_str = "number,multiples"
-        fixture_prod = [(1, 1), (2, 2), (2, 4), (3, 3), (3, 6), ...]
-        fixture_names = ["1-1", "2-2", "2-4", "3-3", "3-6", ...]
-
-    Parameters
-    ----------
-    test_name : str, name of the test, from pytest_generate_tests
-    fixture_vars : list of str
-        fixture variable names used in parameterization of tests
-    generator_dict : dict of generator functions
-        keys are possible str in fixture_vars, expected signature is
-            (test_name: str, **kwargs) -> fixtures: Listof[object], or
-                (returning only fixtures)
-            (test_name: str, **kwargs) -> fixtures, fixture_names: Listof[object]
-                (returning fixture names as well as fixtures)
-        generator_dict[my_variable] can take arguments with names
-            in fixture_sequence to the left of my_variable
-            it should return a list of fixtures for my_variable
-            under the assumption that arguments have given values
-    fixture_sequence : list of str, optional, default = None
-        used in prioritizing conditional generators, sequentially (see above)
-    raise_exceptions : bool, optional, default = False
-        whether fixture generation errors or other Exceptions are raised
-        if False, exceptions are returned instead of fixtures
-    deepcopy_fixtures : bool. optional, default = False
-        whether returned fixture list in fixture_prod are deecopy-independent
-        if False, identical list/tuple elements will be identical by reference
-        if True, identical elements will be identical by value but no by reference
-        "elements" refer to fixture[i] as described below, in fixture_prod
-
-    Returns
-    -------
-    fixture_param_str : str, string to use in pytest.fixture.parameterize
-        this is strings in "fixture_vars" concatenated, separated by ","
-    fixture_prod : list of tuples, fixtures to use in pytest.fixture.parameterize
-        fixture tuples, generated according to the following conditional rule:
-            let fixture_vars = [fixture_var1, fixture_var2, ..., fixture_varN]
-            all fixtures are obtained as following:
-                for i in 1 to N
-                    pick fixture[i] any element of generator_dict[fixture_vari](
-                        test_name,
-                        fixture_var1 = fixture[1], ...,
-                        fixture_var(i-1) = fixture[i-1],
-                    )
-            return (fixture[1], fixture[2], ..., fixture[N])
-        if deepcopy_fixtures = False, identical fixture[i] are identical by reference
-        if deepcopy_fixtures = True, identical fixture[i] are not identical references
-    fixture_names : list of str, fixture ids to use in pytest.fixture.parameterize
-        fixture names, generated according to the following conditional rule:
-            let fixture_vars = [fixture_var1, fixture_var2, ..., fixture_varN]
-            all fixtures names are obtained as following:
-                for i in 1 to N
-                    pick fixture_str_pt[i] any element of generator_dict[fixture_vari](
-                        test_name,
-                        fixture_var1 = fixture[1], ...,
-                        fixture_var(i-1) = fixture[i-1],
-                    ), second return is exists; otherwise str(first return)
-            return "fixture_str_pt[1]-fixture_str_pt[2]-...-fixture_str_pt[N]"
-        fixture names correspond to fixtures with the same indices at picks (from lists)
-    """
-    fixture_vars = _check_list_of_str(fixture_vars, name="fixture_vars")
-    fixture_vars = [var for var in fixture_vars if var in generator_dict.keys()]
-
-    # order fixture_vars according to fixture_sequence if provided
-    if fixture_sequence is not None:
-        fixture_sequence = _check_list_of_str(fixture_sequence, name="fixture_sequence")
-        ordered_fixture_vars = []
-        for fixture_var_name in fixture_sequence:
-            if fixture_var_name in fixture_vars:
-                ordered_fixture_vars += [fixture_var_name]
-        fixture_vars = ordered_fixture_vars
-
-    def get_fixtures(fixture_var, **kwargs):
-        """Call fixture generator from generator_dict, return fixture list.
-
-        Light wrapper around calls to generator_dict[key] functions that generate
-            conditional fixtures. get_fixtures adds default string names to the return
-            if generator_dict[key] does not return them.
-
-        Parameters
-        ----------
-        fixture_var : str, name of fixture variable
-        kwargs : key-value pairs, keys = names of previous fixture variables
-        test_name : str, from local scope
-            name of test for which fixtures are generated
-
-        Returns
-        -------
-        fixture_prod : list of objects or one-element list with FixtureGenerationError
-            fixtures for fixture_var for test_name, conditional on fixtures in kwargs
-            if call to generator_dict[fixture_var] fails, returns list with error
-        fixture_names : list of string, same length as fixture_prod
-            i-th element is a string name for i-th element of fixture_prod
-            if 2nd arg is returned by generator_dict, then 1:1 copy of that argument
-            if no 2nd arg is returned by generator_dict, then str(fixture_prod[i])
-            if fixture_prod is list with error, then string is Error:fixture_var
-        """
-        try:
-            res = generator_dict[fixture_var](test_name, **kwargs)
-            if isinstance(res, tuple) and len(res) == 2:
-                fixture_prod = res[0]
-                fixture_names = res[1]
-            else:
-                fixture_prod = res
-                fixture_names = [str(x) for x in res]
-        except Exception as err:
-            error = FixtureGenerationError(fixture_name=fixture_var, err=err)
-            if raise_exceptions:
-                raise error
-            fixture_prod = [error]
-            fixture_names = [f"Error:{fixture_var}"]
-
-        return fixture_prod, fixture_names
-
-    fixture_prod = [()]
-    fixture_names = [""]
-
-    # we loop over fixture_vars, incrementally going through conditionals
-    for i, fixture_var in enumerate(fixture_vars):
-        old_fixture_vars = fixture_vars[0:i]
-
-        # then take successive left products
-        new_fixture_prod = []
-        new_fixture_names = []
-
-        for j, fixture in enumerate(fixture_prod):
-            # retrieve kwargs corresponding to old fixture values
-            fixture_name = fixture_names[j]
-            if i == 0:
-                kwargs = dict()
-            else:
-                kwargs = dict(zip(old_fixture_vars, fixture))
-            # retrieve conditional fixtures, conditional on fixture values in kwargs
-            new_fixtures, new_fixture_names_r = get_fixtures(fixture_var, **kwargs)
-            # new fixture values are concatenation/product of old values plus new
-            new_fixture_prod += [
-                fixture + (new_fixture,) for new_fixture in new_fixtures
-            ]
-            # new fixture name is concatenation of name so far and "dash-new name"
-            #   if the new name is empty string, don't add a dash
-            if len(new_fixture_names_r) > 0 and new_fixture_names_r[0] != "":
-                new_fixture_names_r = [f"-{x}" for x in new_fixture_names_r]
-            new_fixture_names += [f"{fixture_name}{x}" for x in new_fixture_names_r]
-
-        fixture_prod = new_fixture_prod
-        fixture_names = new_fixture_names
-
-    # due to the concatenation, fixture names all start leading "-" which is removed
-    fixture_names = [x[1:] for x in fixture_names]
-
-    # in pytest convention, variable strings are separated by comma
-    fixture_param_str = ",".join(fixture_vars)
-
-    # we need to remove the tuple bracket from singleton
-    #   in pytest convention, only multiple variables (2 or more) are tuples
-    fixture_prod = [_remove_single(x) for x in fixture_prod]
-
-    # if deepcopy_fixtures = True:
-    # we run deepcopy on every element of fixture_prod to make them independent
-    if deepcopy_fixtures:
-        fixture_prod = [deepcopy(x) for x in fixture_prod]
-
-    return fixture_param_str, fixture_prod, fixture_names
-
-
-def _check_list_of_str(obj, name="obj"):
-    """Check whether obj is a list of str.
-
-    Parameters
-    ----------
-    obj : any object, check whether is list of str
-    name : str, default="obj", name of obj to display in error message
-
-    Returns
-    -------
-    obj, unaltered
-
-    Raises
-    ------
-    TypeError if obj is not list of str
-    """
-    if not isinstance(obj, list) or not np.all([isinstance(x, str) for x in obj]):
-        raise TypeError(f"{name} must be a list of str")
-    return obj
-
-
-def _remove_single(x):
-    """Remove tuple wrapping from singleton.
-
-    Parameters
-    ----------
-    x : tuple
-
-    Returns
-    -------
-    x[0] if x is a singleton, otherwise x
-    """
-    if len(x) == 1:
-        return x[0]
-    else:
-        return x
diff --git a/aeon/testing/utils/estimator_checks.py b/aeon/testing/utils/estimator_checks.py
index db7b288e8d..01f2358ab7 100644
--- a/aeon/testing/utils/estimator_checks.py
+++ b/aeon/testing/utils/estimator_checks.py
@@ -10,11 +10,8 @@
 from pandas.testing import assert_frame_equal
 
 from aeon.base import BaseEstimator, BaseObject
-from aeon.classification.base import BaseClassifier
-from aeon.classification.early_classification import BaseEarlyClassifier
 from aeon.clustering.base import BaseClusterer
 from aeon.regression.base import BaseRegressor
-from aeon.testing.test_config import VALID_ESTIMATOR_TYPES
 from aeon.testing.testing_data import FULL_TEST_DATA_DICT
 from aeon.transformations.base import BaseTransformer
 from aeon.utils.validation import is_nested_univ_dataframe
@@ -23,15 +20,25 @@
 def _run_estimator_method(estimator, method_name, datatype, split):
     method = getattr(estimator, method_name)
     args = inspect.getfullargspec(method)[0]
-    if "X" in args and "y" in args:
-        return method(
-            X=FULL_TEST_DATA_DICT[datatype][split][0],
-            y=FULL_TEST_DATA_DICT[datatype][split][1],
-        )
-    elif "X" in args:
-        return method(X=FULL_TEST_DATA_DICT[datatype][split][0])
-    else:
-        return method()
+    try:
+        if "X" in args and "y" in args:
+            return method(
+                X=FULL_TEST_DATA_DICT[datatype][split][0],
+                y=FULL_TEST_DATA_DICT[datatype][split][1],
+            )
+        elif "X" in args:
+            return method(X=FULL_TEST_DATA_DICT[datatype][split][0])
+        else:
+            return method()
+    # generic message for ModuleNotFoundError which are assumed to be related to
+    # soft dependencies
+    except ModuleNotFoundError as e:
+        raise RuntimeError(
+            f"Estimator {estimator.__name__} raises a ModuleNotFoundError "
+            f"on {method.__name__}. Any required soft dependencies should "
+            f'be added to the "python_dependencies" tag, and python version bounds '
+            f'should be added to the "python_version" tag.'
+        ) from e
 
 
 def _get_tag(estimator, tag_name, default=None, raise_error=False):
@@ -47,13 +54,6 @@ def _get_tag(estimator, tag_name, default=None, raise_error=False):
         )
 
 
-def _get_err_msg(estimator):
-    return (
-        f"Invalid estimator type: {type(estimator)}. Valid estimator types are: "
-        f"{VALID_ESTIMATOR_TYPES}"
-    )
-
-
 def _list_required_methods(estimator):
     """Return list of required method names (beyond BaseEstimator ones)."""
     # all BaseObject children must implement these
@@ -142,14 +142,6 @@ def _assert_array_almost_equal(x, y, decimal=6, err_msg=""):
         func(x, y, decimal=decimal, err_msg=err_msg)
 
 
-def _assert_array_equal(x, y, err_msg=""):
-    func = np.testing.assert_array_equal
-    if isinstance(x, pd.DataFrame):
-        _compare_nested_frame(func, x, y, err_msg=err_msg)
-    else:
-        func(x, y, err_msg=err_msg)
-
-
 def _get_args(function, varargs=False):
     """Get function arguments."""
     try:
@@ -173,32 +165,3 @@ def _get_args(function, varargs=False):
         return args, varargs
     else:
         return args
-
-
-def _has_capability(est, method: str) -> bool:
-    """Check whether estimator has capability of method."""
-
-    def get_tag(est, tag_name, tag_value_default=None):
-        if isclass(est):
-            return est.get_class_tag(
-                tag_name=tag_name, tag_value_default=tag_value_default
-            )
-        else:
-            return est.get_tag(tag_name=tag_name, tag_value_default=tag_value_default)
-
-    if not hasattr(est, method):
-        return False
-    if method == "inverse_transform":
-        return get_tag(est, "capability:inverse_transform", False)
-    if method in [
-        "predict_proba",
-        "predict_interval",
-        "predict_quantiles",
-        "predict_var",
-    ]:
-        ALWAYS_HAVE_PREDICT_PROBA = (BaseClassifier, BaseEarlyClassifier, BaseClusterer)
-        # all classifiers and clusterers implement predict_proba
-        if method == "predict_proba" and isinstance(est, ALWAYS_HAVE_PREDICT_PROBA):
-            return True
-        return get_tag(est, "capability:pred_int", False)
-    return True
diff --git a/aeon/testing/utils/scenarios.py b/aeon/testing/utils/scenarios.py
deleted file mode 100644
index c3f7a0615b..0000000000
--- a/aeon/testing/utils/scenarios.py
+++ /dev/null
@@ -1,274 +0,0 @@
-"""Testing utility to play back usage scenarios for estimators.
-
-Contains TestScenario class which applies method/args subsequently
-"""
-
-__maintainer__ = []
-
-__all__ = ["TestScenario"]
-
-
-from copy import deepcopy
-from inspect import isclass
-
-import numpy as np
-
-
-class TestScenario:
-    """Class to run pre-defined method execution scenarios for objects.
-
-    Parameters
-    ----------
-    args : dict of dict, default = None
-        dict of argument dicts to be used in methods
-        names for keys need not equal names of methods these are used in
-            but scripted method will look at key with same name as default
-        must be passed to constructor, set in a child class
-            or dynamically created in get_args
-    default_method_sequence : list of str, default = None
-        default sequence for methods to be called
-        optional, if given, default method sequence to use in `run`
-        if not provided, at least one of the sequence arguments must be passed in `run`
-            or default_arg_sequence must be provided
-    default_arg_sequence : list of str, default = None
-        default sequence of keys for keyword argument dicts to be used
-        names for keys need not equal names of methods
-        if not provided, at least one of the sequence arguments must be passed in `run`
-            or default_method_sequence must be provided
-
-    Methods
-    -------
-    run(obj, args=None, default_method_sequence=None)
-        Run a call(args) scenario on obj, and retrieve method outputs.
-    is_applicable(obj)
-        Check whether scenario is applicable to obj.
-    get_args(key, obj)
-        Dynamically create args for call defined by key and obj.
-        Defaults to self.args[key] if not overridden.
-    """
-
-    def __init__(
-        self, args=None, default_method_sequence=None, default_arg_sequence=None
-    ):
-        if default_method_sequence is not None:
-            self.default_method_sequence = _check_list_of_str(default_method_sequence)
-        elif not hasattr(self, "default_method_sequence"):
-            self.default_method_sequence = None
-        if default_arg_sequence is not None:
-            self.default_arg_sequence = _check_list_of_str(default_arg_sequence)
-        elif not hasattr(self, "default_arg_sequence"):
-            self.default_arg_sequence = None
-        if args is not None:
-            self.args = _check_dict_of_dict(args)
-        else:
-            if not hasattr(self, "args"):
-                raise RuntimeError(
-                    "args must either be given to __init__ or set in a child class"
-                )
-            _check_dict_of_dict(self.args)
-
-    def get_args(self, key, obj=None, deepcopy_args=True):
-        """Return args for key. Can be overridden for dynamic arg generation.
-
-        If overridden, must not have any side effects on self.args
-            e.g., avoid assignments args[key] = x without deepcopying self.args first
-
-        Parameters
-        ----------
-        key : str, argument key to construct/retrieve args for
-        obj : obj, optional, default=None. Object to construct args for.
-        deepcopy_args : bool, optional, default=True. Whether to deepcopy return.
-
-        Returns
-        -------
-        args : argument dict to be used for a method, keyed by `key`
-            names for keys need not equal names of methods these are used in
-                but scripted method will look at key with same name as default
-        """
-        args = self.args.get(key, {})
-        if deepcopy_args:
-            args = deepcopy(args)
-        return args
-
-    def run(
-        self,
-        obj,
-        method_sequence=None,
-        arg_sequence=None,
-        return_all=False,
-        return_args=False,
-        deepcopy_return=False,
-    ):
-        """Run a call(args) scenario on obj, and retrieve method outputs.
-
-        Runs a sequence of commands
-            res_1 = obj.method_1(**args_1)
-            res_2 = obj.method_2(**args_2)
-            etc, where method_i is method_sequence[i],
-                and args_i is self.args[arg_sequence[i]]
-        and returns results. Args are passed as deepcopy to avoid side effects.
-
-        if method_i is __init__ (a constructor),
-        obj is changed to obj.__init__(**args_i) from the next line on
-
-        Parameters
-        ----------
-        obj : class or object with methods in method_sequence
-        method_sequence : list of str, default = arg_sequence if passed
-            if arg_sequence is also None, then default = self.default_method_sequence
-            sequence of method names to be run
-        arg_sequence : list of str, default = method_sequence if passed
-            if method_sequence is also None, then default = self.default_arg_sequence
-            sequence of keys for keyword argument dicts to be used
-            names for keys need not equal names of methods
-        return_all : bool, default = False
-            whether all or only the last result should be returned
-            if False, only the last result is returned
-            if True, list of deepcopies of intermediate results is returned
-        return_args : bool, default = False
-            whether arguments should also be returned
-            if False, there is no second return argument
-            if True, "args_after_call" return argument is returned
-        deepcopy_return : bool, default = False
-            whether returns are deepcopied before returned
-            if True, returns are deepcopies of return
-            if False, returns are references/assignments, not deepcopies
-                NOTE: if self is returned (e.g., in fit), and deepcopy_return=False
-                    method calls may continue to have side effects on that return
-
-        Returns
-        -------
-        results : output of the last method call, if return_all = False
-            list of deepcopies of all outputs, if return_all = True
-        args_after_call : list of args after method call, only if return_args = True
-            i-th element is deepcopy of args of i-th method call, after method call
-                this is possibly subject to side effects by the method
-        """
-        # if both None, fill with defaults if exist
-        if method_sequence is None and arg_sequence is None:
-            method_sequence = getattr(self, "default_method_sequence", None)
-            arg_sequence = getattr(self, "default_arg_sequence", None)
-
-        # if both are still None, raise an error
-        if method_sequence is None and arg_sequence is None:
-            raise ValueError(
-                "at least one of method_sequence, arg_sequence must be not None "
-                "if no defaults are set in the class"
-            )
-
-        # if only one is None, fill one with the other
-        if method_sequence is None:
-            method_sequence = _check_list_of_str(arg_sequence)
-        else:
-            method_sequence = _check_list_of_str(method_sequence)
-        if arg_sequence is None:
-            arg_sequence = _check_list_of_str(method_sequence)
-        else:
-            arg_sequence = _check_list_of_str(arg_sequence)
-
-        # check that length of sequences is the same
-        num_calls = len(arg_sequence)
-        if not num_calls == len(method_sequence):
-            raise ValueError("arg_sequence and method_sequence must have same length")
-
-        # execute the commands in sequence, report result(s)
-        results = []
-        args_after_call = []
-        for i in range(num_calls):
-            methodname = method_sequence[i]
-            args = deepcopy(self.get_args(key=arg_sequence[i], obj=obj))
-
-            if methodname != "__init__":
-                res = getattr(obj, methodname)(**args)
-            # if constructor is called, run directly and replace obj
-            else:
-                if isclass(obj):
-                    res = obj(**args)
-                else:
-                    res = type(obj)(**args)
-                obj = res
-
-            args_after_call += [args]
-
-            if deepcopy_return:
-                res = deepcopy(res)
-
-            if return_all:
-                results += [res]
-            else:
-                results = res
-
-        if return_args:
-            return results, args_after_call
-        else:
-            return results
-
-    def is_applicable(self, obj):
-        """Check whether scenario is applicable to obj.
-
-        Abstract method, children should implement. This just returns "true".
-
-        Example for child class: scenario is univariate time series forecasting.
-            Then, this returns False on multivariate, True on univariate forecasters.
-
-        Parameters
-        ----------
-        obj : class or object to check against scenario
-
-        Returns
-        -------
-        applicable: bool
-            True if self is applicable to obj, False if not
-            "applicable" is defined as the implementer chooses, as output of this method
-                False is typically used as a "skip" flag in unit or integration testing
-        """
-        return True
-
-
-def _check_list_of_str(obj, name="obj"):
-    """Check whether obj is a list of str.
-
-    Parameters
-    ----------
-    obj : any object, check whether is list of str
-    name : str, default="obj", name of obj to display in error message
-
-    Returns
-    -------
-    obj, unaltered
-
-    Raises
-    ------
-    TypeError if obj is not list of str
-    """
-    if not isinstance(obj, list) or not np.all([isinstance(x, str) for x in obj]):
-        raise TypeError(f"{obj} must be a list of str")
-    return obj
-
-
-def _check_dict_of_dict(obj, name="obj"):
-    """Check whether obj is a dict of dict, with str keys.
-
-    Parameters
-    ----------
-    obj : any object, check whether is dict of dict, with str keys
-    name : str, default="obj", name of obj to display in error message
-
-    Returns
-    -------
-    obj, unaltered
-
-    Raises
-    ------
-    TypeError if obj is not dict of dict, with str keys
-    """
-    if not (
-        isinstance(obj, dict)
-        and np.all([isinstance(x, dict) for x in obj.values()])
-        and np.all([isinstance(x, str) for x in obj.keys()])
-    ):
-        raise TypeError(
-            f"Argument {name} with must be a dict of dict, with str keys, "
-            f"found {type(obj)}"
-        )
-    return obj
diff --git a/aeon/testing/utils/scenarios_classification.py b/aeon/testing/utils/scenarios_classification.py
deleted file mode 100644
index ecb4bc632d..0000000000
--- a/aeon/testing/utils/scenarios_classification.py
+++ /dev/null
@@ -1,187 +0,0 @@
-"""Test scenarios for classification and regression.
-
-Contains TestScenario concrete children to run in tests for classifiers/regressors.
-"""
-
-__maintainer__ = []
-
-__all__ = [
-    "scenarios_classification",
-    "scenarios_early_classification",
-    "scenarios_regression",
-]
-
-from inspect import isclass
-
-from aeon.base import BaseObject
-from aeon.classification.base import BaseClassifier
-from aeon.classification.early_classification import BaseEarlyClassifier
-from aeon.regression.base import BaseRegressor
-from aeon.testing.data_generation import (
-    make_example_3d_numpy,
-    make_example_3d_numpy_list,
-)
-from aeon.testing.utils.scenarios import TestScenario
-
-# random seed for generating data to keep scenarios exactly reproducible
-RAND_SEED = 42
-
-
-class ClassifierTestScenario(TestScenario, BaseObject):
-    """Generic test scenario for classifiers."""
-
-    def get_args(self, key, obj=None, deepcopy_args=True):
-        """Return args for key. Can be overridden for dynamic arg generation.
-
-        If overridden, must not have any side effects on self.args
-            e.g., avoid assignments args[key] = x without deepcopying self.args first
-
-        Parameters
-        ----------
-        key : str, argument key to construct/retrieve args for
-        obj : obj, optional, default=None. Object to construct args for.
-        deepcopy_args : bool, optional, default=True. Whether to deepcopy return.
-
-        Returns
-        -------
-        args : argument dict to be used for a method, keyed by `key`
-            names for keys need not equal names of methods these are used in
-                but scripted method will look at key with same name as default
-        """
-        # use same args for predict-like functions as for predict
-        if key in ["predict_proba", "decision_function"]:
-            key = "predict"
-
-        return super().get_args(key=key, obj=obj, deepcopy_args=deepcopy_args)
-
-    def is_applicable(self, obj):
-        """Check whether scenario is applicable to obj.
-
-        Parameters
-        ----------
-        obj : class or object to check against scenario
-
-        Returns
-        -------
-        applicable: bool
-            True if self is applicable to obj, False if not
-        """
-
-        def get_tag(obj, tag_name):
-            if isclass(obj):
-                return obj.get_class_tag(tag_name)
-            else:
-                return obj.get_tag(tag_name)
-
-        regr_or_classf = (BaseClassifier, BaseEarlyClassifier, BaseRegressor)
-
-        # applicable only if obj inherits from BaseClassifier, BaseEarlyClassifier or
-        #   BaseRegressor. currently we test both classifiers and regressors using these
-        #   scenarios
-        if not isinstance(obj, regr_or_classf) and not issubclass(obj, regr_or_classf):
-            return False
-
-        # if X is multivariate, applicable only if can handle multivariate
-        is_multivariate = not self.get_tag("X_univariate")
-        if is_multivariate and not get_tag(obj, "capability:multivariate"):
-            return False
-
-        # if X is unequal length, applicable only if can handle unequal length
-        is_unequal_length = self.get_tag("X_unequal_length")
-        if is_unequal_length and not get_tag(obj, "capability:unequal_length"):
-            return False
-
-        return True
-
-
-X, y = make_example_3d_numpy(n_cases=10, n_timepoints=20, random_state=RAND_SEED)
-X_test, _ = make_example_3d_numpy(n_cases=5, n_timepoints=20, random_state=RAND_SEED)
-
-X_mv, _ = make_example_3d_numpy(
-    n_cases=10, n_channels=2, n_timepoints=20, random_state=RAND_SEED
-)
-X_test_mv, _ = make_example_3d_numpy(
-    n_cases=5, n_channels=2, n_timepoints=20, random_state=RAND_SEED
-)
-
-X_ul, _ = make_example_3d_numpy_list(
-    n_cases=10, max_n_timepoints=15, min_n_timepoints=10, random_state=RAND_SEED
-)
-X_test_ul, _ = make_example_3d_numpy_list(
-    n_cases=5, max_n_timepoints=15, min_n_timepoints=10, random_state=RAND_SEED
-)
-
-
-class ClassifierFitPredict(ClassifierTestScenario):
-    """Fit/predict with univariate X, labels y."""
-
-    _tags = {
-        "X_univariate": True,
-        "X_unequal_length": False,
-        "is_enabled": True,
-        "n_classes": 2,
-    }
-
-    args = {
-        "fit": {"y": y, "X": X},
-        "predict": {"X": X_test},
-    }
-    default_method_sequence = ["fit", "predict", "predict_proba", "decision_function"]
-    default_arg_sequence = ["fit", "predict", "predict", "predict"]
-
-
-class ClassifierFitPredictMultivariate(ClassifierTestScenario):
-    """Fit/predict with multivariate panel X and labels y."""
-
-    _tags = {
-        "X_univariate": False,
-        "X_unequal_length": False,
-        "is_enabled": True,
-        "n_classes": 2,
-    }
-
-    args = {
-        "fit": {"y": y, "X": X_mv},
-        "predict": {"X": X_test_mv},
-    }
-    default_method_sequence = ["fit", "predict", "predict_proba", "decision_function"]
-    default_arg_sequence = ["fit", "predict", "predict", "predict"]
-
-
-class ClassifierFitPredictUnequalLength(ClassifierTestScenario):
-    """Fit/predict with univariate X and labels y, unequal length series."""
-
-    _tags = {
-        "X_univariate": True,
-        "X_unequal_length": True,
-        "is_enabled": True,
-        "n_classes": 2,
-    }
-
-    args = {
-        "fit": {"y": y, "X": X_ul},
-        "predict": {"X": X_ul},
-    }
-    default_method_sequence = ["fit", "predict", "predict_proba", "decision_function"]
-    default_arg_sequence = ["fit", "predict", "predict", "predict"]
-
-
-scenarios_classification = [
-    ClassifierFitPredict,
-    ClassifierFitPredictMultivariate,
-    ClassifierFitPredictUnequalLength,
-]
-
-# same scenarios used for early classification
-scenarios_early_classification = [
-    ClassifierFitPredict,
-    ClassifierFitPredictMultivariate,
-    ClassifierFitPredictUnequalLength,
-]
-
-# we use the same scenarios for regression, as in the old test suite
-scenarios_regression = [
-    ClassifierFitPredict,
-    ClassifierFitPredictMultivariate,
-    ClassifierFitPredictUnequalLength,
-]
diff --git a/aeon/testing/utils/scenarios_getter.py b/aeon/testing/utils/scenarios_getter.py
deleted file mode 100644
index dafa981ea6..0000000000
--- a/aeon/testing/utils/scenarios_getter.py
+++ /dev/null
@@ -1,114 +0,0 @@
-"""Retrieval utility for test scenarios."""
-
-__maintainer__ = []
-
-__all__ = ["retrieve_scenarios"]
-
-
-from aeon.base import BaseObject
-from aeon.registry import BASE_CLASS_IDENTIFIER_LIST, get_identifiers
-from aeon.testing.utils.scenarios_classification import (
-    scenarios_classification,
-    scenarios_early_classification,
-    scenarios_regression,
-)
-from aeon.testing.utils.scenarios_transformers import scenarios_transformers
-
-scenarios = dict()
-scenarios["classifier"] = scenarios_classification
-scenarios["early_classifier"] = scenarios_early_classification
-scenarios["regressor"] = scenarios_regression
-scenarios["transformer"] = scenarios_transformers
-
-
-def retrieve_scenarios(obj, filter_tags=None):
-    """Retrieve test scenarios for obj, or by estimator type string.
-
-    Exactly one of the arguments obj, estimator_type must be provided.
-
-    Parameters
-    ----------
-    obj : class or object, or string, or list of str.
-        Which kind of estimator/object to retrieve scenarios for.
-        If object, must be a class or object inheriting from BaseObject.
-        If string(s), must be in registry.BASE_CLASS_REGISTER (first col)
-            for instance 'classifier', 'regressor', 'transformer', 'forecaster'
-    filter_tags: dict of (str or list of str), default=None
-        subsets the returned objectss as follows:
-            each key/value pair is statement in "and"/conjunction
-                key is tag name to sub-set on
-                value str or list of string are tag values
-                condition is "key must be equal to value, or in set(value)"
-
-    Returns
-    -------
-    scenarios : list of objects, instances of BaseScenario
-    """
-    if not isinstance(obj, (str, BaseObject)) and not issubclass(obj, BaseObject):
-        raise TypeError("obj must be a str or inherit from BaseObject")
-    if isinstance(obj, str) and obj not in BASE_CLASS_IDENTIFIER_LIST:
-        raise ValueError(
-            "if obj is a str, then obj must be a valid identifier, "
-            "see registry.BASE_CLASS_IDENTIFIER_LIST for valid identifier strings"
-        )
-
-    # if class, get identifier from inference; otherwise, str or list of str
-    if not isinstance(obj, str):
-        estimator_type = get_identifiers(obj)
-    else:
-        estimator_type = obj
-
-    # coerce to list, ensure estimator_type is list of str
-    if not isinstance(estimator_type, list):
-        estimator_type = [estimator_type]
-
-    # now loop through types and retrieve scenarios
-    scenarios_for_type = []
-    for est_type in estimator_type:
-        scens = scenarios.get(est_type)
-        if scens is not None:
-            scenarios_for_type += scenarios.get(est_type)
-
-    # instantiate all scenarios by calling constructor
-    scenarios_for_type = [x() for x in scenarios_for_type]
-
-    # if obj was an object, filter to applicable scenarios
-    if not isinstance(obj, str) and not isinstance(obj, list):
-        scenarios_for_type = [x for x in scenarios_for_type if x.is_applicable(obj)]
-
-    if filter_tags is not None:
-        scenarios_for_type = [
-            scen for scen in scenarios_for_type if _check_tag_cond(scen, filter_tags)
-        ]
-
-    return scenarios_for_type
-
-
-def _check_tag_cond(obj, filter_tags=None):
-    """Check whether object satisfies filter_tags condition.
-
-    Parameters
-    ----------
-    obj: object inheriting from aeon BaseObject
-    filter_tags: dict of (str or list of str), default=None
-        subsets the returned objectss as follows:
-            each key/value pair is statement in "and"/conjunction
-                key is tag name to sub-set on
-                value str or list of string are tag values
-                condition is "key must be equal to value, or in set(value)"
-
-    Returns
-    -------
-    cond_sat: bool, whether estimator satisfies condition in filter_tags
-    """
-    if not isinstance(filter_tags, dict):
-        raise TypeError("filter_tags must be a dict")
-
-    cond_sat = True
-
-    for key, value in filter_tags.items():
-        if not isinstance(value, list):
-            value = [value]
-        cond_sat = cond_sat and obj.get_class_tag(key) in set(value)
-
-    return cond_sat
diff --git a/aeon/testing/utils/scenarios_transformers.py b/aeon/testing/utils/scenarios_transformers.py
deleted file mode 100644
index 7c0c752080..0000000000
--- a/aeon/testing/utils/scenarios_transformers.py
+++ /dev/null
@@ -1,467 +0,0 @@
-"""Test scenarios for transformers.
-
-Contains TestScenario concrete children to run in tests for transformers.
-"""
-
-__maintainer__ = []
-
-__all__ = ["scenarios_transformers"]
-
-import inspect
-from copy import deepcopy
-from inspect import isclass
-
-import numpy as np
-import pandas as pd
-from sklearn.utils import check_random_state
-
-from aeon.base import BaseObject
-from aeon.testing.data_generation import _make_hierarchical
-from aeon.testing.data_generation._legacy import (
-    _make_classification_y,
-    _make_collection_X,
-    make_series,
-)
-from aeon.testing.utils.scenarios import TestScenario
-from aeon.transformations.collection.base import BaseCollectionTransformer
-from aeon.utils.validation import abstract_types
-
-# random seed for generating data to keep scenarios exactly reproducible
-RAND_SEED = 42
-RAND_SEED2 = 84
-
-# typical length of time series
-N_T = 10
-
-
-def _make_primitives(n_columns=1, random_state=None):
-    """Generate one or more primitives, for checking inverse-transform."""
-    rng = check_random_state(random_state)
-    if n_columns == 1:
-        return rng.rand()
-    return rng.rand(size=(n_columns,))
-
-
-def _make_tabular_X(n_cases=20, n_columns=1, return_numpy=True, random_state=None):
-    """Generate tabular X, for checking inverse-transform."""
-    rng = check_random_state(random_state)
-    X = rng.rand(n_cases, n_columns)
-    if return_numpy:
-        return X
-    else:
-        return pd.DataFrame(X)
-
-
-def _is_child_of(obj, class_or_tuple):
-    """Shorthand for 'inherits from', obj can be class or object."""
-    if isclass(obj):
-        return issubclass(obj, class_or_tuple)
-    else:
-        return isinstance(obj, class_or_tuple)
-
-
-def get_tag(obj, tag_name):
-    """Shorthand for get_tag vs get_class_tag, obj can be class or object."""
-    if isclass(obj):
-        return obj.get_class_tag(tag_name)
-    else:
-        return obj.get_tag(tag_name)
-
-
-def _internal_abstract_type(obj, inner_tag, series_types):
-    inner_types = get_tag(obj, inner_tag)
-    if isinstance(inner_types, str):
-        inner_types = {inner_types}
-    else:
-        inner_types = set(inner_types)
-    return not inner_types.issubset(series_types)
-
-
-class TransformerTestScenario(TestScenario, BaseObject):
-    """Generic test scenario for transformers."""
-
-    def is_applicable(self, obj):
-        """Check whether scenario is applicable to obj.
-
-        Parameters
-        ----------
-        obj : class or object to check against scenario
-
-        Returns
-        -------
-        applicable: bool
-            True if self is applicable to obj, False if not
-        """
-        # pre-refactor classes can't deal with Series *and* Panel both
-        X_type = self.get_tag("X_type")
-        y_type = self.get_tag("y_type", None, raise_error=False)
-
-        if (
-            isinstance(obj, BaseCollectionTransformer)
-            or (inspect.isclass(obj) and issubclass(obj, BaseCollectionTransformer))
-        ) and X_type != "Panel":
-            return False
-
-        # if transformer requires y, the scenario also must pass y
-        has_y = self.get_tag("has_y")
-        if not has_y and get_tag(obj, "requires_y"):
-            return False
-
-        # the case that we would need to vectorize with y, skip
-        X_inner_type = get_tag(obj, "X_inner_type")
-        X_inner_abstract_types = abstract_types(X_inner_type)
-        # we require vectorization from of a Series trafo to Panel data ...
-        if X_type == "Panel" and "Panel" not in X_inner_abstract_types:
-            # ... but y is passed and y is not ignored internally ...
-            if self.get_tag("has_y") and get_tag(obj, "y_inner_type") != "None":
-                # ... this would raise an error since vectorization is not defined
-                return False
-
-        # ensure scenario y matches type of inner y
-        y_inner_type = get_tag(obj, "y_inner_type")
-        if y_inner_type not in [None, "None"]:
-            y_inner_abstract_types = abstract_types(y_inner_type)
-            if y_type not in y_inner_abstract_types:
-                return False
-
-        # only applicable if X of supported index type
-        X = self.args["fit"]["X"]
-        supported_idx_types = get_tag(obj, "enforce_index_type")
-        if isinstance(X, (pd.Series, pd.DataFrame)) and supported_idx_types is not None:
-            if type(X.index) not in supported_idx_types:
-                return False
-        if isinstance(X, np.ndarray) and supported_idx_types is not None:
-            if pd.RangeIndex not in supported_idx_types:
-                return False
-
-        return True
-
-    def get_args(self, key, obj=None, deepcopy_args=False):
-        """Return args for key. Can be overridden for dynamic arg generation.
-
-        If overridden, must not have any side effects on self.args
-            e.g., avoid assignments args[key] = x without deepcopying self.args first
-
-        Parameters
-        ----------
-        key : str, argument key to construct/retrieve args for
-        obj : obj, optional, default=None. Object to construct args for.
-        deepcopy_args : bool, optional, default=True. Whether to deepcopy return.
-
-        Returns
-        -------
-        args : argument dict to be used for a method, keyed by `key`
-            names for keys need not equal names of methods these are used in
-                but scripted method will look at key with same name as default
-        """
-        if key == "inverse_transform":
-            if obj is None:
-                raise ValueError('if key="inverse_transform", obj must be provided')
-
-            X_type = self.get_tag("X_type")
-
-            X_out_type = get_tag(obj, "output_data_type")
-            X_panel = get_tag(obj, "instancewise")
-
-            X_out_series = X_out_type == "Series"
-            X_out_prim = X_out_type == "Primitives"
-
-            # determine output by X_type
-            s2s = X_type == "Series" and X_out_series
-            s2p = X_type == "Series" and X_out_prim
-            p2t = X_type == "Panel" and X_out_prim
-            p2p = X_type == "Panel" and X_out_series
-
-            # expected input type of inverse_transform is expected output of transform
-            if s2p:
-                args = {"X": _make_primitives(random_state=RAND_SEED)}
-            elif s2s:
-                args = {"X": make_series(n_timepoints=N_T, random_state=RAND_SEED)}
-            elif p2t:
-                args = {"X": _make_tabular_X(n_cases=7, nrandom_state=RAND_SEED)}
-            elif p2p:
-                args = {
-                    "X": _make_collection_X(
-                        n_cases=7, n_timepoints=N_T, random_state=RAND_SEED
-                    )
-                }
-            else:
-                raise RuntimeError(
-                    "transformer with unexpected combination of tags: "
-                    f"X_out_type = {X_out_type}, instancewise = {X_panel}"
-                )
-
-        else:
-            # default behaviour, happens except when key = "inverse_transform"
-            args = self.args.get(key, {})
-
-        if deepcopy_args:
-            args = deepcopy(args)
-
-        return args
-
-
-X_series = make_series(n_timepoints=N_T, random_state=RAND_SEED)
-X_panel = _make_collection_X(
-    n_cases=7, n_channels=1, n_timepoints=N_T, random_state=RAND_SEED
-)
-
-
-class TransformerFitTransformSeriesUnivariate(TransformerTestScenario):
-    """Fit/transform, univariate Series X."""
-
-    _tags = {
-        # These tags are only used in testing and are not defined in the registry
-        "X_type": "Series",
-        "X_univariate": True,
-        "has_y": False,
-        "is_enabled": True,
-    }
-
-    args = {
-        "fit": {"X": make_series(n_timepoints=N_T + 1, random_state=RAND_SEED)},
-        "transform": {"X": make_series(n_timepoints=N_T + 1, random_state=RAND_SEED2)},
-        # "inverse_transform": {"X": make_series(n_timepoints=N_T)},
-    }
-    default_method_sequence = ["fit", "transform"]
-
-
-class TransformerFitTransformSeriesMultivariate(TransformerTestScenario):
-    """Fit/transform, multivariate Series X."""
-
-    _tags = {
-        "X_type": "Series",
-        "X_univariate": False,
-        "has_y": False,
-        "is_enabled": True,
-    }
-
-    args = {
-        "fit": {
-            "X": make_series(n_columns=2, n_timepoints=N_T, random_state=RAND_SEED),
-        },
-        "transform": {
-            "X": make_series(n_columns=2, n_timepoints=N_T, random_state=RAND_SEED)
-        },
-    }
-    default_method_sequence = ["fit", "transform"]
-
-
-class TransformerFitTransformSeriesUnivariateWithY(TransformerTestScenario):
-    """Fit/transform, univariate Series X and univariate Series y."""
-
-    _tags = {
-        "X_type": "Series",
-        "X_univariate": True,
-        "has_y": True,
-        "is_enabled": True,
-        "y_type": "Series",
-    }
-
-    args = {
-        "fit": {
-            "X": make_series(n_columns=1, n_timepoints=N_T, random_state=RAND_SEED),
-            "y": make_series(n_columns=1, n_timepoints=N_T, random_state=RAND_SEED),
-        },
-        "transform": {
-            "X": make_series(n_columns=1, n_timepoints=N_T, random_state=RAND_SEED),
-            "y": make_series(n_columns=1, n_timepoints=N_T, random_state=RAND_SEED),
-        },
-    }
-    default_method_sequence = ["fit", "transform"]
-
-
-y3 = _make_classification_y(n_cases=9, n_classes=3)
-X_np = _make_collection_X(
-    n_cases=9,
-    n_channels=1,
-    n_timepoints=N_T,
-    all_positive=True,
-    return_numpy=True,
-    random_state=RAND_SEED,
-)
-X_test_np = _make_collection_X(
-    n_cases=9,
-    n_channels=1,
-    n_timepoints=N_T,
-    all_positive=True,
-    return_numpy=True,
-    random_state=RAND_SEED2,
-)
-
-
-class TransformerFitTransformPanelUnivariateNumpyWithClassYOnlyFit(
-    TransformerTestScenario
-):
-    """Fit/predict with univariate panel X, numpy3D input type, and labels y."""
-
-    _tags = {
-        "X_type": "Panel",
-        "X_univariate": True,
-        "has_y": True,
-        "is_enabled": True,
-        "y_type": "Table",
-    }
-
-    args = {
-        "fit": {"y": y3, "X": X_np},
-        "transform": {"X": X_test_np},
-    }
-    default_method_sequence = ["fit", "transform"]
-
-
-class TransformerFitTransformPanelUnivariate(TransformerTestScenario):
-    """Fit/transform, univariate Panel X."""
-
-    _tags = {
-        "X_type": "Panel",
-        "X_univariate": True,
-        "has_y": False,
-        "is_enabled": False,
-    }
-
-    args = {
-        "fit": {
-            "X": _make_collection_X(
-                n_cases=7, n_channels=1, n_timepoints=N_T, random_state=RAND_SEED
-            )
-        },
-        "transform": {
-            "X": _make_collection_X(
-                n_cases=7, n_channels=1, n_timepoints=N_T, random_state=RAND_SEED
-            )
-        },
-    }
-    default_method_sequence = ["fit", "transform"]
-
-
-class TransformerFitTransformPanelMultivariate(TransformerTestScenario):
-    """Fit/transform, multivariate Panel X."""
-
-    _tags = {
-        "X_type": "Panel",
-        "X_univariate": False,
-        "has_y": False,
-        "is_enabled": False,
-    }
-
-    args = {
-        "fit": {
-            "X": _make_collection_X(
-                n_cases=7, n_channels=2, n_timepoints=N_T, random_state=RAND_SEED
-            )
-        },
-        "transform": {
-            "X": _make_collection_X(
-                n_cases=7, n_channels=2, n_timepoints=N_T, random_state=RAND_SEED
-            )
-        },
-    }
-    default_method_sequence = ["fit", "transform"]
-
-
-class TransformerFitTransformPanelUnivariateWithClassY(TransformerTestScenario):
-    """Fit/transform, multivariate Panel X, with y in fit and transform."""
-
-    _tags = {
-        "X_type": "Panel",
-        "X_univariate": True,
-        "is_enabled": True,
-        "has_y": True,
-        "y_type": "Table",
-    }
-
-    args = {
-        "fit": {
-            "X": _make_collection_X(
-                n_cases=7,
-                n_channels=1,
-                n_timepoints=N_T + 1,
-                all_positive=True,
-                random_state=RAND_SEED,
-            ),
-            "y": _make_classification_y(n_cases=7, n_classes=2),
-        },
-        "transform": {
-            "X": _make_collection_X(
-                n_cases=7,
-                n_channels=1,
-                n_timepoints=N_T + 1,
-                all_positive=True,
-                random_state=RAND_SEED,
-            ),
-            "y": _make_classification_y(n_cases=7, n_classes=2),
-        },
-    }
-    default_method_sequence = ["fit", "transform"]
-
-
-class TransformerFitTransformPanelUnivariateWithClassYOnlyFit(TransformerTestScenario):
-    """Fit/transform, multivariate Panel X, with y in fit but not in transform."""
-
-    _tags = {
-        "X_type": "Panel",
-        "X_univariate": True,
-        "is_enabled": False,
-        "has_y": True,
-        "y_type": "Table",
-    }
-
-    args = {
-        "fit": {
-            "X": _make_collection_X(n_cases=7, n_channels=1, n_timepoints=N_T),
-            "y": _make_classification_y(n_cases=7, n_classes=2),
-        },
-        "transform": {
-            "X": _make_collection_X(n_cases=7, n_channels=1, n_timepoints=N_T)
-        },
-    }
-    default_method_sequence = ["fit", "transform"]
-
-
-class TransformerFitTransformHierarchicalUnivariate(TransformerTestScenario):
-    """Fit/transform, univariate Hierarchical X."""
-
-    _tags = {
-        "X_type": "Hierarchical",
-        "X_univariate": True,
-        "is_enabled": False,
-        "has_y": False,
-    }
-
-    args = {
-        "fit": {"X": _make_hierarchical(random_state=RAND_SEED)},
-        "transform": {"X": _make_hierarchical(random_state=RAND_SEED + 1)},
-    }
-    default_method_sequence = ["fit", "transform"]
-
-
-class TransformerFitTransformHierarchicalMultivariate(TransformerTestScenario):
-    """Fit/transform, multivariate Hierarchical X."""
-
-    _tags = {
-        "X_type": "Hierarchical",
-        "X_univariate": False,
-        "is_enabled": False,
-        "has_y": False,
-    }
-
-    args = {
-        "fit": {"X": _make_hierarchical(random_state=RAND_SEED, n_columns=2)},
-        "transform": {"X": _make_hierarchical(random_state=RAND_SEED + 1, n_columns=2)},
-    }
-    default_method_sequence = ["fit", "transform"]
-
-
-scenarios_transformers = [
-    TransformerFitTransformSeriesUnivariate,
-    TransformerFitTransformSeriesMultivariate,
-    TransformerFitTransformSeriesUnivariateWithY,
-    TransformerFitTransformPanelUnivariate,
-    TransformerFitTransformPanelMultivariate,
-    TransformerFitTransformPanelUnivariateWithClassY,
-    TransformerFitTransformPanelUnivariateWithClassYOnlyFit,
-    TransformerFitTransformPanelUnivariateNumpyWithClassYOnlyFit,
-    TransformerFitTransformHierarchicalMultivariate,
-    TransformerFitTransformHierarchicalUnivariate,
-]
diff --git a/aeon/testing/utils/tests/test_forecasting.py b/aeon/testing/utils/tests/test_forecasting.py
deleted file mode 100644
index 8a1989122a..0000000000
--- a/aeon/testing/utils/tests/test_forecasting.py
+++ /dev/null
@@ -1,17 +0,0 @@
-"""Test for making forecasting problems."""
-
-__maintainer__ = []
-__all__ = []
-
-import pandas as pd
-import pytest
-
-from aeon.testing.data_generation._legacy import make_forecasting_problem
-
-
-@pytest.mark.parametrize("n_timepoints", [3, 5])
-def test_make_forecasting_problem(n_timepoints):
-    y = make_forecasting_problem(n_timepoints)
-
-    assert isinstance(y, pd.Series)
-    assert y.shape[0] == n_timepoints
diff --git a/aeon/testing/utils/tests/test_testscenario_getter.py b/aeon/testing/utils/tests/test_testscenario_getter.py
deleted file mode 100644
index 7e7c444941..0000000000
--- a/aeon/testing/utils/tests/test_testscenario_getter.py
+++ /dev/null
@@ -1,42 +0,0 @@
-"""Test suite for retrieving scenarios."""
-
-__maintainer__ = []
-__all__ = []
-
-import numpy as np
-import pytest
-
-from aeon.registry import BASE_CLASS_IDENTIFIER_LIST, BASE_CLASS_LIST
-from aeon.testing.utils.scenarios import TestScenario
-from aeon.testing.utils.scenarios_getter import retrieve_scenarios
-
-
-@pytest.mark.parametrize("estimator_class", BASE_CLASS_LIST)
-def test_get_scenarios_for_class(estimator_class):
-    """Test retrieval of scenarios by class."""
-    scenarios = retrieve_scenarios(obj=estimator_class)
-
-    assert isinstance(scenarios, list), "return of retrieve_scenarios is not a list"
-    assert np.all(
-        isinstance(x, TestScenario) for x in scenarios
-    ), "return of retrieve_scenarios is not a list of scenarios"
-
-
-@pytest.mark.parametrize("type_string", BASE_CLASS_IDENTIFIER_LIST)
-def test_get_scenarios_for_string(type_string):
-    """Test retrieval of scenarios by string."""
-    scenarios = retrieve_scenarios(obj=type_string)
-
-    assert isinstance(scenarios, list), "return of retrieve_scenarios is not a list"
-    assert np.all(
-        isinstance(x, TestScenario) for x in scenarios
-    ), "return of retrieve_scenarios is not a list of scenarios"
-
-
-def test_get_scenarios_errors():
-    """Test that errors are raised for bad input args."""
-    with pytest.raises(TypeError):
-        retrieve_scenarios()
-
-    with pytest.raises(TypeError):
-        retrieve_scenarios(obj=1)
diff --git a/aeon/testing/utils/tests/test_testscenarios.py b/aeon/testing/utils/tests/test_testscenarios.py
deleted file mode 100644
index 56efea30e7..0000000000
--- a/aeon/testing/utils/tests/test_testscenarios.py
+++ /dev/null
@@ -1,161 +0,0 @@
-"""Making test scenarios."""
-
-__maintainer__ = []
-__all__ = []
-
-
-from aeon.testing.utils.scenarios import TestScenario
-
-
-class TestedMockClass:
-    """Mock class to test TestScenario."""
-
-    def __init__(self, a):
-        self.a = a
-
-    def foo(self, b):
-        """Test method for mock class to test TestScenario."""
-        self.a += b
-        return self.a
-
-    def bar(self, c, d="0"):
-        """Test method for mock class to test TestScenario."""
-        self.a += c
-        self.a += d
-        return self.a
-
-    @classmethod
-    def baz(cls):
-        return "foo"
-
-
-def test_testscenario_object_args_only():
-    """Test basic workflow: construct only with args, call run with minimal args."""
-    obj = TestedMockClass(a="super")
-    scenario = TestScenario(
-        args={"foo": {"b": "cali"}, "bar": {"c": "fragi", "d": "listic"}}
-    )
-
-    result = scenario.run(obj, method_sequence=["foo", "bar"])
-
-    assert result == "supercalifragilistic"
-
-
-def test_testscenario_object_default_method_sequence():
-    """Test basic workflow: construct with args and default method sequence."""
-    obj = TestedMockClass(a="super")
-    scenario = TestScenario(
-        args={"foo": {"b": "cali"}, "bar": {"c": "fragi", "d": "listic"}},
-        default_method_sequence=["foo", "bar"],
-    )
-
-    result = scenario.run(obj)
-
-    assert result == "supercalifragilistic"
-
-
-def test_testscenario_object_default_arg_sequence():
-    """Test basic workflow: construct with args and default arg sequence."""
-    obj = TestedMockClass(a="super")
-    scenario = TestScenario(
-        args={"foo": {"b": "cali"}, "bar": {"c": "fragi", "d": "listic"}},
-        default_arg_sequence=["foo", "bar"],
-    )
-
-    result = scenario.run(obj)
-
-    assert result == "supercalifragilistic"
-
-
-def test_testscenario_object_return_all():
-    """Test basic workflow: construct with args and default arg sequence."""
-    obj = TestedMockClass(a="super")
-    scenario = TestScenario(
-        args={"foo": {"b": "cali"}, "bar": {"c": "fragi", "d": "listic"}},
-        default_arg_sequence=["foo", "bar"],
-    )
-
-    result = scenario.run(obj, return_all=True)
-
-    assert result == ["supercali", "supercalifragilistic"]
-
-
-def test_testscenario_object_multi_call_defaults():
-    """Test basic workflow: default args where methods are called multiple times."""
-    obj = TestedMockClass(a="super")
-    scenario = TestScenario(
-        args={
-            "foo": {"b": "cali"},
-            "bar": {"c": "fragi", "d": "listic"},
-            "foo-2nd": {"b": "expi"},
-            "bar-2nd": {"c": "ali", "d": "docious"},
-        },
-        default_arg_sequence=["foo", "bar", "foo-2nd", "bar-2nd"],
-        default_method_sequence=["foo", "bar", "foo", "bar"],
-    )
-
-    result = scenario.run(obj)
-
-    assert result == "supercalifragilisticexpialidocious"
-
-
-def test_testscenario_object_multi_call_in_run():
-    """Test advanced workflow: run args where methods are called multiple times."""
-    obj = TestedMockClass(a="super")
-    scenario = TestScenario(
-        args={
-            "foo": {"b": "cali"},
-            "bar": {"c": "fragi", "d": "listic"},
-            "foo-2nd": {"b": "expi"},
-            "bar-2nd": {"c": "ali", "d": "docious"},
-        },
-    )
-
-    result = scenario.run(
-        obj,
-        arg_sequence=["foo", "bar", "foo-2nd", "bar-2nd"],
-        method_sequence=["foo", "bar", "foo", "bar"],
-    )
-
-    assert result == "supercalifragilisticexpialidocious"
-
-
-def test_testscenario_class_full_options():
-    """Test advanced workflow: constructor and methods called multiple times."""
-    obj = TestedMockClass
-    scenario = TestScenario(
-        args={
-            "__init__": {"a": "super"},
-            "foo": {"b": "cali"},
-            "bar": {"c": "fragi", "d": "listic"},
-            "foo-2nd": {"b": "expi"},
-            "bar-2nd": {"c": "ali", "d": "docious"},
-        },
-    )
-
-    result = scenario.run(
-        obj,
-        arg_sequence=["__init__", "foo", "bar", "foo-2nd", "bar-2nd"],
-        method_sequence=["__init__", "foo", "bar", "foo", "bar"],
-    )
-
-    assert result == "supercalifragilisticexpialidocious"
-
-
-def test_testscenario_class_simple():
-    """Test advanced workflow: constructor, but only simple function calls."""
-    obj = TestedMockClass
-    scenario = TestScenario(
-        args={
-            "__init__": {"a": "super"},
-            "foo": {"b": "cali"},
-            "bar": {"c": "fragi", "d": "listic"},
-        },
-    )
-
-    result = scenario.run(
-        obj,
-        method_sequence=["__init__", "foo", "bar"],
-    )
-
-    assert result == "supercalifragilistic"
diff --git a/aeon/transformations/collection/_normalise.py b/aeon/transformations/collection/_normalise.py
index 27dd0ba766..2e9c203cd2 100644
--- a/aeon/transformations/collection/_normalise.py
+++ b/aeon/transformations/collection/_normalise.py
@@ -44,7 +44,7 @@ class Normalise(BaseCollectionTransformer):
         "capability:multivariate": True,
     }
 
-    def __init__(self, method: str = "z-norm"):
+    def __init__(self, method: str = "z_norm"):
         self.method = method
         super().__init__()
 
diff --git a/aeon/transformations/tests/__init__.py b/aeon/transformations/tests/__init__.py
deleted file mode 100644
index fd185a7336..0000000000
--- a/aeon/transformations/tests/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""Tests for transformers."""
diff --git a/aeon/transformations/tests/test_all_transformers.py b/aeon/transformations/tests/test_all_transformers.py
deleted file mode 100644
index b143681777..0000000000
--- a/aeon/transformations/tests/test_all_transformers.py
+++ /dev/null
@@ -1,125 +0,0 @@
-"""Unit tests common to all transformers."""
-
-__maintainer__ = []
-__all__ = []
-
-import numpy as np
-import pandas as pd
-from sklearn.utils._testing import set_random_state
-
-from aeon.datasets import load_basic_motions, load_unit_test
-from aeon.testing.expected_results.expected_transform_outputs import (
-    basic_motions_result,
-    unit_test_result,
-)
-from aeon.testing.test_all_estimators import BaseFixtureGenerator, QuickTester
-from aeon.testing.utils.estimator_checks import _assert_array_almost_equal
-
-
-class TransformerFixtureGenerator(BaseFixtureGenerator):
-    """Fixture generator for transformer tests.
-
-    Fixtures parameterized
-    ----------------------
-    estimator_class: estimator inheriting from BaseObject
-        ranges over all estimator classes not excluded by EXCLUDED_TESTS
-    estimator_instance: instance of estimator inheriting from BaseObject
-        ranges over all estimator classes not excluded by EXCLUDED_TESTS
-        instances are generated by create_test_instance class method
-    scenario: instance of TestScenario
-        ranges over all scenarios returned by retrieve_scenarios
-    """
-
-    # note: this should be separate from TestAllTransformers
-    #   additional fixtures, parameters, etc should be added here
-    #   TestAllTransformers should contain the tests only
-
-    estimator_type_filter = "transformer"
-
-
-class TestAllTransformers(TransformerFixtureGenerator, QuickTester):
-    """Module level tests for all aeon transformers."""
-
-    def test_capability_inverse_tag_is_correct(self, estimator_instance):
-        """Test that the capability:inverse_transform tag is set correctly."""
-        capability_tag = estimator_instance.get_tag("capability:inverse_transform")
-        skip_tag = estimator_instance.get_tag("skip-inverse-transform")
-        if capability_tag and not skip_tag:
-            assert estimator_instance._has_implementation_of("_inverse_transform")
-
-    def test_remember_data_tag_is_correct(self, estimator_instance):
-        """Test that the remember_data tag is set correctly."""
-        fit_empty_tag = estimator_instance.get_tag("fit_is_empty", True)
-        remember_data_tag = estimator_instance.get_tag("remember_data", False)
-        msg = (
-            'if the "remember_data" tag is set to True, then the "fit_is_empty" tag '
-            "must be set to False, even if _fit is not implemented or empty. "
-            "This is due to boilerplate that write to self.X in fit. "
-            f"Please check these two tags in {type(estimator_instance)}."
-        )
-        if fit_empty_tag and remember_data_tag:
-            raise AssertionError(msg)
-
-    def test_transform_inverse_transform_equivalent(self, estimator_instance, scenario):
-        """Test that inverse_transform is indeed inverse to transform."""
-        # skip this test if the estimator does not have inverse_transform
-        if not estimator_instance.get_class_tag("capability:inverse_transform", False):
-            return None
-
-        # skip this test if the estimator skips inverse_transform
-        if estimator_instance.get_tag("skip-inverse-transform", False):
-            return None
-
-        X = scenario.args["transform"]["X"]
-        Xt = scenario.run(estimator_instance, method_sequence=["fit", "transform"])
-        Xit = estimator_instance.inverse_transform(Xt)
-        if estimator_instance.get_tag("transform-returns-same-time-index"):
-            _assert_array_almost_equal(X, Xit)
-        elif isinstance(X, pd.DataFrame):
-            _assert_array_almost_equal(X.loc[Xit.index], Xit)
-
-    def test_transformer_against_expected_results(self, estimator_class):
-        """Test transformer against stored results."""
-        # we only use the first estimator instance for testing
-        classname = estimator_class.__name__
-
-        for data_name, data_dict, data_loader, data_seed in [
-            ["UnitTest", unit_test_result, load_unit_test, 0],
-            ["BasicMotions", basic_motions_result, load_basic_motions, 4],
-        ]:
-            # retrieve expected transform output, and skip test if not available
-            if classname in data_dict.keys():
-                expected_results = data_dict[classname]
-            else:
-                # skip test if no expected results are registered
-                continue
-
-            # we only use the first estimator instance for testing
-            estimator_instance = estimator_class.create_test_instance(
-                parameter_set="results_comparison"
-            )
-            # set random seed if possible
-            set_random_state(estimator_instance, 0)
-
-            # load test data
-            X_train, y_train = data_loader(split="train")
-            indices = np.random.RandomState(data_seed).choice(
-                len(y_train), 5, replace=False
-            )
-
-            # fir transformer and transform
-            results = np.nan_to_num(
-                estimator_instance.fit_transform(X_train[indices], y_train[indices]),
-                False,
-                0,
-                0,
-                0,
-            )
-
-            # assert results are the same
-            _assert_array_almost_equal(
-                results,
-                expected_results,
-                decimal=2,
-                err_msg=f"Failed to reproduce results for {classname} on {data_name}",
-            )
diff --git a/aeon/testing/utils/tests/test_check_imports.py b/aeon/utils/validation/tests/test_check_imports.py
similarity index 100%
rename from aeon/testing/utils/tests/test_check_imports.py
rename to aeon/utils/validation/tests/test_check_imports.py
diff --git a/conftest.py b/conftest.py
index 029a99bd49..ace2d0b708 100644
--- a/conftest.py
+++ b/conftest.py
@@ -37,7 +37,7 @@ def pytest_configure(config):
 
     import numba
 
-    from aeon.testing import test_config
+    from aeon.testing import testing_config
     from aeon.utils.validation._dependencies import _check_soft_dependencies
 
     numba.set_num_threads(1)
@@ -57,4 +57,4 @@ def pytest_configure(config):
         torch.set_num_threads(1)
 
     if config.getoption("--prtesting") in [True, "True", "true"]:
-        test_config.PR_TESTING = True
+        testing_config.PR_TESTING = True