diff --git a/aeon/clustering/deep_learning/tests/test_random_state_deep_learning_cluster.py b/aeon/clustering/deep_learning/tests/test_random_state_deep_learning_cluster.py deleted file mode 100644 index 580d5eccf0..0000000000 --- a/aeon/clustering/deep_learning/tests/test_random_state_deep_learning_cluster.py +++ /dev/null @@ -1,60 +0,0 @@ -"""Unit tests for clusterer deep learning random_state functionality.""" - -import inspect - -import numpy as np -import pytest - -from aeon.clustering import deep_learning -from aeon.testing.data_generation import make_example_3d_numpy - -__maintainer__ = ["hadifawaz1999"] - - -@pytest.mark.skipif( - # not _check_soft_dependencies("tensorflow", severity="none"), - # See Issue #1761 - True, - reason="skip test if required soft dependency not available", -) -def test_random_state_deep_learning_clr(): - """Test Deep Clusterer seeding.""" - random_state = 42 - - X, _ = make_example_3d_numpy(random_state=random_state) - - deep_clr_classes = [ - member[1] for member in inspect.getmembers(deep_learning, inspect.isclass) - ] - - for i in range(len(deep_clr_classes)): - if "BaseDeepClusterer" in str(deep_clr_classes[i]): - continue - - deep_clr1 = deep_clr_classes[i]( - n_clusters=2, random_state=random_state, n_epochs=4 - ) - deep_clr1.fit(X) - - layers1 = deep_clr1.training_model_.layers[1:] - - deep_clr2 = deep_clr_classes[i]( - n_clusters=2, random_state=random_state, n_epochs=4 - ) - deep_clr2.fit(X) - - layers2 = deep_clr2.training_model_.layers[1:] - - assert len(layers1) == len(layers2) - - for i in range(len(layers1)): - weights1 = layers1[i].get_weights() - weights2 = layers2[i].get_weights() - - assert len(weights1) == len(weights2) - - for j in range(len(weights1)): - _weight1 = np.asarray(weights1[j]) - _weight2 = np.asarray(weights2[j]) - - np.testing.assert_almost_equal(_weight1, _weight2, 4) diff --git a/aeon/clustering/dummy.py b/aeon/clustering/dummy.py index e46206cd93..523f0748bb 100644 --- a/aeon/clustering/dummy.py +++ b/aeon/clustering/dummy.py @@ -1,6 +1,7 @@ """Implements DummyClusterer to be used as Baseline.""" import numpy as np +from sklearn.utils import check_random_state from aeon.clustering.base import BaseClusterer @@ -22,10 +23,17 @@ class DummyClusterer(BaseClusterer): - "random": Assign clusters randomly. - "uniform": Distribute clusters uniformly among samples. - "single_cluster": Assign all samples to a single cluster. - n_clusters : int, default=3 The number of clusters to generate. This is relevant for "random" and "uniform" strategies. + random_state : int, np.random.RandomState instance or None, default=None + Determines random number generation for centroid initialization. + Only used when `strategy` is "random". + If `int`, random_state is the seed used by the random number generator; + If `np.random.RandomState` instance, + random_state is the random number generator; + If `None`, the random number generator is the `RandomState` instance used + by `np.random`. Attributes ---------- @@ -38,19 +46,19 @@ class DummyClusterer(BaseClusterer): >>> import numpy as np >>> X = np.array([[1, 2], [3, 4], [5, 6]]) >>> clusterer = DummyClusterer(strategy="uniform", n_clusters=2) - >>> clusterer._fit(X) + >>> clusterer.fit(X) DummyClusterer(n_clusters=2, strategy='uniform') >>> clusterer.labels_ array([0, 1, 0]) - >>> clusterer._predict(X) + >>> clusterer.predict(X) array([0, 1, 0]) """ - def __init__(self, strategy="random", n_clusters=3): - super().__init__() + def __init__(self, strategy="random", n_clusters=3, random_state=None): self.strategy = strategy - self.n_clusters = n_clusters - self.labels_ = None + self.random_state = random_state + + super().__init__(n_clusters=n_clusters) def _fit(self, X, y=None): """ @@ -72,7 +80,8 @@ def _fit(self, X, y=None): n_samples = X.shape[0] if self.strategy == "random": - self.labels_ = np.random.randint(0, self.n_clusters, n_samples) + rng = check_random_state(self.random_state) + self.labels_ = rng.randint(self.n_clusters, size=n_samples) elif self.strategy == "uniform": self.labels_ = np.tile( np.arange(self.n_clusters), n_samples // self.n_clusters + 1 @@ -103,7 +112,8 @@ def _predict(self, X, y=None) -> np.ndarray: """ n_samples = X.shape[0] if self.strategy == "random": - return np.random.randint(0, self.n_clusters, n_samples) + rng = check_random_state(self.random_state) + return rng.randint(self.n_clusters, size=n_samples) elif self.strategy == "uniform": return np.tile( np.arange(self.n_clusters), n_samples // self.n_clusters + 1 diff --git a/aeon/clustering/feature_based/_catch22.py b/aeon/clustering/feature_based/_catch22.py index eac0af3730..6c716e249d 100644 --- a/aeon/clustering/feature_based/_catch22.py +++ b/aeon/clustering/feature_based/_catch22.py @@ -209,7 +209,7 @@ def _predict_proba(self, X) -> np.ndarray: n_clusters = self.n_clusters if n_clusters is None: n_clusters = int(max(preds)) + 1 - dists = np.zeros((X.shape[0], n_clusters)) + dists = np.zeros((len(X), n_clusters)) for i in range(n_cases): dists[i, preds[i]] = 1 return dists diff --git a/aeon/clustering/tests/test_all_clusterers.py b/aeon/clustering/tests/test_all_clusterers.py deleted file mode 100644 index 98da489e29..0000000000 --- a/aeon/clustering/tests/test_all_clusterers.py +++ /dev/null @@ -1,43 +0,0 @@ -"""Test all clusterers comply to interface.""" - -import numpy as np -import pytest - -from aeon.registry import all_estimators -from aeon.utils.validation._dependencies import _check_soft_dependencies - -ALL_CLUSTERERS = all_estimators("clusterer", return_names=False) - - -@pytest.mark.parametrize("clst", ALL_CLUSTERERS) -def test_clusterer_tags_consistent(clst): - """Test all estimators capability tags reflect their capabilities.""" - if not _check_soft_dependencies( - clst.get_class_tag("python_dependencies", []), severity="none" - ): - return - - # Test the tag X_inner_type is consistent with capability:unequal_length - unequal_length = clst.get_class_tag("capability:unequal_length") - valid_types = {"np-list", "df-list", "pd-multivariate", "nested_univ"} - if unequal_length: # one of X_inner_types must be capable of storing unequal length - internal_types = clst.get_class_tag("X_inner_type") - if isinstance(internal_types, str): - assert internal_types in valid_types - else: # must be a list - assert bool(set(internal_types) & valid_types) - # Test can actually fit/predict with multivariate if tag is set - multivariate = clst.get_class_tag("capability:multivariate") - if multivariate: - X = np.random.random((10, 2, 10)) - inst = clst.create_test_instance(parameter_set="default") - inst.fit(X) - inst.predict(X) - inst.predict_proba(X) - - -@pytest.mark.parametrize("clst", ALL_CLUSTERERS) -def test_does_not_override_final_methods(clst): - """Test does not override final methods.""" - assert "fit" not in clst.__dict__ - assert "predict" not in clst.__dict__ diff --git a/aeon/testing/estimator_checking/_yield_clustering_checks.py b/aeon/testing/estimator_checking/_yield_clustering_checks.py new file mode 100644 index 0000000000..cf68855bb4 --- /dev/null +++ b/aeon/testing/estimator_checking/_yield_clustering_checks.py @@ -0,0 +1,84 @@ +"""Tests for all clusterers.""" + +from functools import partial + +import numpy as np + +from aeon.base._base import _clone_estimator +from aeon.clustering.deep_learning import BaseDeepClusterer +from aeon.testing.testing_data import FULL_TEST_DATA_DICT + + +def _yield_clustering_checks(estimator_class, estimator_instances, datatypes): + """Yield all clustering checks for an aeon clusterer.""" + # only class required + yield partial(check_clusterer_tags_consistent, estimator_class=estimator_class) + yield partial( + check_clusterer_does_not_override_final_methods, estimator_class=estimator_class + ) + + # test class instances + for i, estimator in enumerate(estimator_instances): + # data type irrelevant + if isinstance(estimator, BaseDeepClusterer): + yield partial( + check_clustering_random_state_deep_learning, + estimator=estimator, + datatype=datatypes[i][0], + ) + + +def check_clusterer_tags_consistent(estimator_class): + """Test all estimators capability tags reflect their capabilities.""" + # Test the tag X_inner_type is consistent with capability:unequal_length + unequal_length = estimator_class.get_class_tag("capability:unequal_length") + valid_types = {"np-list", "df-list", "pd-multivariate", "nested_univ"} + if unequal_length: # one of X_inner_types must be capable of storing unequal length + internal_types = estimator_class.get_class_tag("X_inner_type") + if isinstance(internal_types, str): + assert internal_types in valid_types + else: # must be a list + assert bool(set(internal_types) & valid_types) + # Test can actually fit/predict with multivariate if tag is set + multivariate = estimator_class.get_class_tag("capability:multivariate") + if multivariate: + X = np.random.random((10, 2, 10)) + inst = estimator_class.create_test_instance(parameter_set="default") + inst.fit(X) + inst.predict(X) + inst.predict_proba(X) + + +def check_clusterer_does_not_override_final_methods(estimator_class): + """Test does not override final methods.""" + assert "fit" not in estimator_class.__dict__ + assert "predict" not in estimator_class.__dict__ + + +def check_clustering_random_state_deep_learning(estimator, datatype): + """Test Deep Clusterer seeding.""" + random_state = 42 + + deep_clr1 = _clone_estimator(estimator, random_state=random_state) + deep_clr1.fit(FULL_TEST_DATA_DICT[datatype]["train"][0]) + + layers1 = deep_clr1.training_model_.layers[1:] + + deep_clr2 = _clone_estimator(estimator, random_state=random_state) + deep_clr2.fit(FULL_TEST_DATA_DICT[datatype]["train"][0]) + + layers2 = deep_clr2.training_model_.layers[1:] + + assert len(layers1) == len(layers2) + + for i in range(len(layers1)): + weights1 = layers1[i].get_weights() + weights2 = layers2[i].get_weights() + + assert len(weights1) == len(weights2) + + for j in range(len(weights1)): + _weight1 = np.asarray(weights1[j]) + _weight2 = np.asarray(weights2[j]) + + np.testing.assert_almost_equal(_weight1, _weight2, 4) diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py index 3cc212af8b..ca5b36d797 100644 --- a/aeon/testing/estimator_checking/_yield_estimator_checks.py +++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py @@ -15,12 +15,16 @@ from aeon.base._base import _clone_estimator from aeon.classification import BaseClassifier from aeon.classification.deep_learning.base import BaseDeepClassifier +from aeon.clustering import BaseClusterer from aeon.clustering.deep_learning.base import BaseDeepClusterer from aeon.regression import BaseRegressor from aeon.regression.deep_learning.base import BaseDeepRegressor from aeon.testing.estimator_checking._yield_classification_checks import ( _yield_classification_checks, ) +from aeon.testing.estimator_checking._yield_clustering_checks import ( + _yield_clustering_checks, +) from aeon.testing.estimator_checking._yield_regression_checks import ( _yield_regression_checks, ) @@ -88,6 +92,11 @@ def _yield_all_aeon_checks( estimator_class, estimator_instances, datatypes ) + if issubclass(estimator_class, BaseClusterer): + yield from _yield_clustering_checks( + estimator_class, estimator_instances, datatypes + ) + def _yield_estimator_checks(estimator_class, estimator_instances, datatypes): """Yield all general checks for an aeon estimator.""" diff --git a/aeon/testing/test_all_estimators.py b/aeon/testing/test_all_estimators.py index b65276b9dc..eb18e94051 100644 --- a/aeon/testing/test_all_estimators.py +++ b/aeon/testing/test_all_estimators.py @@ -205,7 +205,7 @@ def _all_estimators(self): estimator_types=getattr(self, "estimator_type_filter", None), return_names=False, exclude_estimators=EXCLUDE_ESTIMATORS, - exclude_estimator_types=["classifier", "regressor"], + exclude_estimator_types=["classifier", "regressor", "clusterer"], ) # subsample estimators by OS & python version diff --git a/aeon/testing/test_config.py b/aeon/testing/test_config.py index 07b1918c90..25decbc902 100644 --- a/aeon/testing/test_config.py +++ b/aeon/testing/test_config.py @@ -72,6 +72,8 @@ # needs investigation "SASTClassifier": ["check_fit_deterministic"], "RSASTClassifier": ["check_fit_deterministic"], + "AEFCNClusterer": ["check_fit_updates_state"], + "AEResNetClusterer": ["check_fit_updates_state"], } # We use estimator tags in addition to class hierarchies to further distinguish diff --git a/aeon/testing/tests/test_all_estimators.py b/aeon/testing/tests/test_all_estimators.py index a356c7eb0c..372aae833e 100644 --- a/aeon/testing/tests/test_all_estimators.py +++ b/aeon/testing/tests/test_all_estimators.py @@ -9,7 +9,7 @@ from aeon.utils.sampling import random_partition ALL_ESTIMATORS = all_estimators( - estimator_types=["classifier", "regressor"], + estimator_types=["classifier", "regressor", "clusterer"], return_names=False, )