From 2306ebdec5a5f7e95d340b16db8e8fbcec479485 Mon Sep 17 00:00:00 2001 From: Tony Bagnall Date: Fri, 25 Oct 2024 13:34:35 +0100 Subject: [PATCH 01/25] remove y from predict --- aeon/clustering/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 17231fdf1f..371f3ad74c 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -67,7 +67,7 @@ def fit(self, X, y=None) -> BaseCollectionEstimator: return self @final - def predict(self, X, y=None) -> np.ndarray: + def predict(self, X) -> np.ndarray: """Predict the closest cluster each sample in X belongs to. Parameters @@ -81,7 +81,6 @@ def predict(self, X, y=None) -> np.ndarray: of shape ``[n_cases]``, 2D np.array ``(n_channels, n_timepoints_i)``, where ``n_timepoints_i`` is length of series ``i``. Other types are allowed and converted into one of the above. - y: ignored, exists for API consistency reasons. Returns ------- From 04c385b461f70503fc7a90675e3cfcab596638c1 Mon Sep 17 00:00:00 2001 From: Tony Bagnall Date: Thu, 31 Oct 2024 17:02:57 +0000 Subject: [PATCH 02/25] remove score --- aeon/clustering/base.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 371f3ad74c..e3151db9b3 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -146,27 +146,6 @@ def fit_predict(self, X, y=None) -> np.ndarray: self.fit(X) return self.predict(X) - def score(self, X, y=None) -> float: - """Score the quality of the clusterer. - - Parameters - ---------- - X : np.ndarray (2d or 3d array of shape (n_cases, n_timepoints) or shape - (n_cases, n_channels, n_timepoints)). - Time series instances to train clusterer and then have indexes each belong - to return. - y: ignored, exists for API consistency reasons. - - Returns - ------- - score : float - Score of the clusterer. - """ - self._check_is_fitted() - X = self._preprocess_collection(X, store_metadata=False) - self._check_shape(X) - return self._score(X, y) - def _predict_proba(self, X) -> np.ndarray: """Predicts labels probabilities for sequences in X. From a346080107d7b047156aecaeef63d8dd42b59f94 Mon Sep 17 00:00:00 2001 From: Tony Bagnall Date: Thu, 31 Oct 2024 17:07:00 +0000 Subject: [PATCH 03/25] remove score, add fit_predict --- aeon/clustering/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index e3151db9b3..2913212f6d 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -143,6 +143,10 @@ def fit_predict(self, X, y=None) -> np.ndarray: np.ndarray (1d array of shape (n_cases,)) Index of the cluster each time series in X belongs to. """ + return self._fit_predict(X, y) + + def _fit_predict(self, X, y=None) -> np.ndarray: + """Fit predict using base methods.""" self.fit(X) return self.predict(X) @@ -185,9 +189,6 @@ def _predict_proba(self, X) -> np.ndarray: dists[i, preds[i]] = 1 return dists - @abstractmethod - def _score(self, X, y=None): ... - @abstractmethod def _predict(self, X, y=None) -> np.ndarray: """Predict the closest cluster each sample in X belongs to. From 60615dd109d59311d7688e5dd215238946fe2a78 Mon Sep 17 00:00:00 2001 From: Tony Bagnall Date: Thu, 31 Oct 2024 17:10:57 +0000 Subject: [PATCH 04/25] remove score, add fit_predict --- aeon/clustering/base.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 2913212f6d..78f804a935 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -146,7 +146,21 @@ def fit_predict(self, X, y=None) -> np.ndarray: return self._fit_predict(X, y) def _fit_predict(self, X, y=None) -> np.ndarray: - """Fit predict using base methods.""" + """Fit predict using base methods. + + Parameters + ---------- + X : np.ndarray (2d or 3d array of shape (n_cases, n_timepoints) or shape + (n_cases, n_channels, n_timepoints)). + Time series instances to train clusterer and then have indexes each belong + to return. + y: ignored, exists for API consistency reasons. + + Returns + ------- + np.ndarray (1d array of shape (n_cases,)) + Index of the cluster each time series in X belongs to. + """ self.fit(X) return self.predict(X) From aad00ef226d74ee77713192555c1d909c4f16cfd Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 1 Nov 2024 19:19:06 +0000 Subject: [PATCH 05/25] removed score and n_clusters from base class --- aeon/clustering/_clara.py | 6 ++---- aeon/clustering/_elastic_som.py | 6 ++---- aeon/clustering/_k_means.py | 6 ++---- aeon/clustering/_k_medoids.py | 6 ++---- aeon/clustering/_k_shape.py | 6 ++---- aeon/clustering/_k_shapes.py | 3 ++- aeon/clustering/_kernel_k_means.py | 3 ++- aeon/clustering/base.py | 10 +++++----- aeon/clustering/compose/_pipeline.py | 3 --- aeon/clustering/deep_learning/_ae_fcn.py | 8 +------- aeon/clustering/deep_learning/_ae_resnet.py | 8 +------- aeon/clustering/deep_learning/base.py | 4 +--- .../tests/test_deep_clusterer_base.py | 2 -- aeon/clustering/dummy.py | 19 ++----------------- aeon/clustering/feature_based/_catch22.py | 3 --- aeon/clustering/feature_based/_summary.py | 3 --- aeon/clustering/feature_based/_tsfresh.py | 3 --- aeon/clustering/tests/test_base.py | 3 --- aeon/clustering/tests/test_dummy.py | 15 --------------- aeon/clustering/tests/test_k_shape.py | 3 --- aeon/clustering/tests/test_kernel_k_means.py | 4 ---- .../mock_estimators/_mock_clusterers.py | 9 ++------- 22 files changed, 26 insertions(+), 107 deletions(-) diff --git a/aeon/clustering/_clara.py b/aeon/clustering/_clara.py index 4f44f5adab..dcc0e7a912 100644 --- a/aeon/clustering/_clara.py +++ b/aeon/clustering/_clara.py @@ -139,6 +139,7 @@ def __init__( self.distance_params = distance_params self.n_samples = n_samples self.n_sampling_iters = n_sampling_iters + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -148,7 +149,7 @@ def __init__( self._random_state = None self._kmedoids_instance = None - super().__init__(n_clusters) + super().__init__() def _predict(self, X: np.ndarray, y=None) -> np.ndarray: return self._kmedoids_instance.predict(X) @@ -207,9 +208,6 @@ def _fit(self, X: np.ndarray, y=None): self.n_iter_ = best_pam.n_iter_ self._kmedoids_instance = best_pam - def _score(self, X, y=None): - return -self.inertia_ - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/_elastic_som.py b/aeon/clustering/_elastic_som.py index e7d7d34682..36a8769b13 100644 --- a/aeon/clustering/_elastic_som.py +++ b/aeon/clustering/_elastic_som.py @@ -179,6 +179,7 @@ def __init__( self.init = init self.sigma_decay_function = sigma_decay_function self.custom_alignment_path = custom_alignment_path + self.n_clusters = n_clusters self._random_state = None self._alignment_path_callable = None @@ -191,7 +192,7 @@ def __init__( self.labels_ = None self.cluster_centers_ = None - super().__init__(n_clusters=n_clusters) + super().__init__() def _fit(self, X, y=None): self._check_params(X) @@ -219,9 +220,6 @@ def _fit(self, X, y=None): def _predict(self, X, y=None): return self._find_bmu(X, self.cluster_centers_) - def _score(self, X, y=None): - raise NotImplementedError("TimeSeriesSOM does not support scoring") - def _find_bmu(self, x, weights): pairwise_matrix = pairwise_distance( x, diff --git a/aeon/clustering/_k_means.py b/aeon/clustering/_k_means.py index 550d38944e..2581b2d5f4 100644 --- a/aeon/clustering/_k_means.py +++ b/aeon/clustering/_k_means.py @@ -192,6 +192,7 @@ def __init__( self.distance_params = distance_params self.average_params = average_params self.averaging_method = averaging_method + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -203,7 +204,7 @@ def __init__( self._averaging_method = None self._average_params = None - super().__init__(n_clusters) + super().__init__() def _fit(self, X: np.ndarray, y=None): self._check_params(X) @@ -281,9 +282,6 @@ def _fit_one_init(self, X: np.ndarray) -> tuple: return prev_labels, cluster_centres, prev_inertia, i + 1 - def _score(self, X, y=None): - return -self.inertia_ - def _predict(self, X: np.ndarray, y=None) -> np.ndarray: if isinstance(self.distance, str): pairwise_matrix = pairwise_distance( diff --git a/aeon/clustering/_k_medoids.py b/aeon/clustering/_k_medoids.py index 1f36f75ebe..ea8e860afc 100644 --- a/aeon/clustering/_k_medoids.py +++ b/aeon/clustering/_k_medoids.py @@ -171,6 +171,7 @@ def __init__( self.random_state = random_state self.distance_params = distance_params self.method = method + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -184,7 +185,7 @@ def __init__( self._fit_method = None self._distance_params = {} - super().__init__(n_clusters) + super().__init__() def _fit(self, X: np.ndarray, y=None): self._check_params(X) @@ -207,9 +208,6 @@ def _fit(self, X: np.ndarray, y=None): self.cluster_centers_ = best_centers self.n_iter_ = best_iters - def _score(self, X, y=None): - return -self.inertia_ - def _predict(self, X: np.ndarray, y=None) -> np.ndarray: if isinstance(self.distance, str): pairwise_matrix = pairwise_distance( diff --git a/aeon/clustering/_k_shape.py b/aeon/clustering/_k_shape.py index 3da2aca0cf..ccb49bec80 100644 --- a/aeon/clustering/_k_shape.py +++ b/aeon/clustering/_k_shape.py @@ -89,6 +89,7 @@ def __init__( self.tol = tol self.verbose = verbose self.random_state = random_state + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -97,7 +98,7 @@ def __init__( self._tslearn_k_shapes = None - super().__init__(n_clusters=n_clusters) + super().__init__() def _fit(self, X, y=None): """Fit time series clusterer to training data. @@ -179,6 +180,3 @@ def _get_test_params(cls, parameter_set="default"): "verbose": False, "random_state": 1, } - - def _score(self, X, y=None): - return np.abs(self.inertia_) diff --git a/aeon/clustering/_k_shapes.py b/aeon/clustering/_k_shapes.py index cdad58032a..4cb50fdec5 100644 --- a/aeon/clustering/_k_shapes.py +++ b/aeon/clustering/_k_shapes.py @@ -90,6 +90,7 @@ def __init__( self.tol = tol self.verbose = verbose self.random_state = random_state + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -98,7 +99,7 @@ def __init__( self._tslearn_k_shapes = None - super().__init__(n_clusters=n_clusters) + super().__init__() def _fit(self, X, y=None): """Fit time series clusterer to training data. diff --git a/aeon/clustering/_kernel_k_means.py b/aeon/clustering/_kernel_k_means.py index 6511c6a393..2a7b255757 100644 --- a/aeon/clustering/_kernel_k_means.py +++ b/aeon/clustering/_kernel_k_means.py @@ -108,6 +108,7 @@ def __init__( self.verbose = verbose self.n_jobs = n_jobs self.random_state = random_state + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -116,7 +117,7 @@ def __init__( self._tslearn_kernel_k_means = None - super().__init__(n_clusters=n_clusters) + super().__init__() def _fit(self, X, y=None): """Fit time series clusterer to training data. diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 78f804a935..2aca96545e 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -1,7 +1,5 @@ """Base class for clustering.""" -from typing import Optional - __maintainer__ = [] __all__ = ["BaseClusterer"] @@ -28,8 +26,7 @@ class BaseClusterer(BaseCollectionEstimator): "fit_is_empty": False, } - def __init__(self, n_clusters: Optional[int] = None): - self.n_clusters = n_clusters + def __init__(self): # required for compatibility with some sklearn interfaces e.g. # CalibratedClassifierCV self._estimator_type = "clusterer" @@ -195,7 +192,10 @@ def _predict_proba(self, X) -> np.ndarray: for i, u in enumerate(unique): preds[preds == u] = i n_cases = len(preds) - n_clusters = self.n_clusters + if hasattr(self, "n_clusters"): + n_clusters = self.n_clusters + else: + n_clusters = len(np.unique(preds)) if n_clusters is None: n_clusters = int(max(preds)) + 1 dists = np.zeros((X.shape[0], n_clusters)) diff --git a/aeon/clustering/compose/_pipeline.py b/aeon/clustering/compose/_pipeline.py index 763f872e49..a946ebf2dc 100644 --- a/aeon/clustering/compose/_pipeline.py +++ b/aeon/clustering/compose/_pipeline.py @@ -88,9 +88,6 @@ def __init__(self, transformers, clusterer, random_state=None): def _fit(self, X, y=None): return super()._fit(X, y) - def _score(self, X, y=None): - raise NotImplementedError("Pipeline does not support scoring.") - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_fcn.py b/aeon/clustering/deep_learning/_ae_fcn.py index 0075817823..6d362c0c32 100644 --- a/aeon/clustering/deep_learning/_ae_fcn.py +++ b/aeon/clustering/deep_learning/_ae_fcn.py @@ -168,10 +168,10 @@ def __init__( self.save_last_model = save_last_model self.best_file_name = best_file_name self.random_state = random_state + self.n_clusters = n_clusters super().__init__( estimator=estimator, - n_clusters=n_clusters, clustering_algorithm=clustering_algorithm, clustering_params=clustering_params, batch_size=batch_size, @@ -320,12 +320,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_resnet.py b/aeon/clustering/deep_learning/_ae_resnet.py index 56d12cb487..a6b8f98f1f 100644 --- a/aeon/clustering/deep_learning/_ae_resnet.py +++ b/aeon/clustering/deep_learning/_ae_resnet.py @@ -178,12 +178,12 @@ def __init__( self.best_file_name = best_file_name self.last_file_name = last_file_name self.optimizer = optimizer + self.n_clusters = n_clusters self.history = None super().__init__( estimator=estimator, - n_clusters=n_clusters, clustering_algorithm=clustering_algorithm, clustering_params=clustering_params, batch_size=batch_size, @@ -336,12 +336,6 @@ def _fit(self, X): gc.collect() return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/base.py b/aeon/clustering/deep_learning/base.py index dc84d7c187..ced00367a6 100644 --- a/aeon/clustering/deep_learning/base.py +++ b/aeon/clustering/deep_learning/base.py @@ -42,7 +42,6 @@ class BaseDeepClusterer(BaseClusterer): def __init__( self, - n_clusters=None, estimator=None, clustering_algorithm="deprecated", clustering_params=None, @@ -50,7 +49,6 @@ def __init__( last_file_name="last_file", ): self.estimator = estimator - self.n_clusters = n_clusters self.clustering_algorithm = clustering_algorithm self.clustering_params = clustering_params self.batch_size = batch_size @@ -58,7 +56,7 @@ def __init__( self.model_ = None - super().__init__(n_clusters=n_clusters) + super().__init__() @abstractmethod def build_model(self, input_shape): diff --git a/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py b/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py index 64099bc87c..9e65866532 100644 --- a/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py +++ b/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py @@ -41,5 +41,3 @@ def test_base_deep_clusterer(estimator): ypred_proba = dummy_deep_clr.predict_proba(X) assert ypred_proba is not None assert len(ypred_proba[0]) == len(np.unique(y)) - score = dummy_deep_clr.score(X) - assert isinstance(score, np.float64) or isinstance(score, np.float32) diff --git a/aeon/clustering/dummy.py b/aeon/clustering/dummy.py index 523f0748bb..3c83d224e8 100644 --- a/aeon/clustering/dummy.py +++ b/aeon/clustering/dummy.py @@ -57,8 +57,9 @@ class DummyClusterer(BaseClusterer): def __init__(self, strategy="random", n_clusters=3, random_state=None): self.strategy = strategy self.random_state = random_state + self.n_clusters = n_clusters - super().__init__(n_clusters=n_clusters) + super().__init__() def _fit(self, X, y=None): """ @@ -122,19 +123,3 @@ def _predict(self, X, y=None) -> np.ndarray: return np.zeros(n_samples, dtype=int) else: raise ValueError("Unknown strategy type") - - def _score(self, X, y=None): - if self.strategy == "single_cluster": - centers = np.mean(X, axis=0).reshape(1, -1) - else: - centers = np.array( - [X[self.labels_ == i].mean(axis=0) for i in range(self.n_clusters)] - ) - - inertia = np.sum( - [ - np.sum((X[self.labels_ == i] - centers[i]) ** 2) - for i in range(len(centers)) - ] - ) - return inertia diff --git a/aeon/clustering/feature_based/_catch22.py b/aeon/clustering/feature_based/_catch22.py index 0b6b2e32fa..74d2d54674 100644 --- a/aeon/clustering/feature_based/_catch22.py +++ b/aeon/clustering/feature_based/_catch22.py @@ -214,9 +214,6 @@ def _predict_proba(self, X) -> np.ndarray: dists[i, preds[i]] = 1 return dists - def _score(self, X, y=None): - raise NotImplementedError("Catch22Clusterer does not support scoring.") - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/feature_based/_summary.py b/aeon/clustering/feature_based/_summary.py index 26bb296f0e..ddd3e3825e 100644 --- a/aeon/clustering/feature_based/_summary.py +++ b/aeon/clustering/feature_based/_summary.py @@ -169,6 +169,3 @@ def _predict_proba(self, X) -> np.ndarray: for i in range(n_cases): dists[i, preds[i]] = 1 return dists - - def _score(self, X, y=None): - raise NotImplementedError("SummaryClusterer does not support scoring.") diff --git a/aeon/clustering/feature_based/_tsfresh.py b/aeon/clustering/feature_based/_tsfresh.py index 503638e239..c735602ff4 100644 --- a/aeon/clustering/feature_based/_tsfresh.py +++ b/aeon/clustering/feature_based/_tsfresh.py @@ -211,9 +211,6 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: dists[i, preds[i]] = 1 return dists - def _score(self, X: np.ndarray, y: Optional[np.ndarray] = None): - raise NotImplementedError("TSFreshClusterer does not support scoring.") - @classmethod def _get_test_params(cls, parameter_set: str = "default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/tests/test_base.py b/aeon/clustering/tests/test_base.py index a40405e5b5..8a8fb81b2d 100644 --- a/aeon/clustering/tests/test_base.py +++ b/aeon/clustering/tests/test_base.py @@ -51,9 +51,6 @@ def _predict(self, X): """Predict dummy.""" return np.zeros(shape=(len(X),), dtype=int) - def _score(self, X, y=None): - return 1.0 - def test_base_clusterer(): """Test with no clusters.""" diff --git a/aeon/clustering/tests/test_dummy.py b/aeon/clustering/tests/test_dummy.py index 0a42f039c2..1a94d91ff6 100644 --- a/aeon/clustering/tests/test_dummy.py +++ b/aeon/clustering/tests/test_dummy.py @@ -17,18 +17,3 @@ def test_dummy_clusterer(strategy): assert len(preds) == 3 assert np.all(np.array([(pred < 3) for pred in preds])) assert np.all(np.array([(pred >= 0) for pred in preds])) - - -def test_dummy_clusterer_score(): - """Test score method of the dummy clusterer.""" - model = DummyClusterer(strategy="random") - data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - model.fit(data) - score = model.score(data) - assert score is not None - - model = DummyClusterer(strategy="single_cluster") - model.fit(data) - score = model.score(data) - assert score is not None - assert score == 54.0 diff --git a/aeon/clustering/tests/test_k_shape.py b/aeon/clustering/tests/test_k_shape.py index 4e82162e7e..8af9743004 100644 --- a/aeon/clustering/tests/test_k_shape.py +++ b/aeon/clustering/tests/test_k_shape.py @@ -14,7 +14,6 @@ expected_iters = 2 expected_labels = [0, 2, 1, 1, 1] -expected_score = 0.5645477840468736 @pytest.mark.skipif( @@ -31,10 +30,8 @@ def test_kshapes(): kshapes = TimeSeriesKShape(random_state=1, n_clusters=3) kshapes.fit(X_train[0:max_train]) test_shape_result = kshapes.predict(X_test[0:max_train]) - score = kshapes.score(X_test[0:max_train]) proba = kshapes.predict_proba(X_test[0:max_train]) assert np.array_equal(test_shape_result, expected_results) - np.testing.assert_almost_equal(score, expected_score) assert kshapes.n_iter_ == expected_iters assert np.array_equal(kshapes.labels_, expected_labels) assert proba.shape == (max_train, 3) diff --git a/aeon/clustering/tests/test_kernel_k_means.py b/aeon/clustering/tests/test_kernel_k_means.py index e46794b26a..f4af21f4f5 100644 --- a/aeon/clustering/tests/test_kernel_k_means.py +++ b/aeon/clustering/tests/test_kernel_k_means.py @@ -9,8 +9,6 @@ expected_labels = [0, 2, 1, 2, 0] -expected_score = 4.0 - expected_iters = 2 expected_results = [0, 0, 0, 0, 0] @@ -30,11 +28,9 @@ def test_kernel_k_means(): kernel_kmeans = TimeSeriesKernelKMeans(random_state=1, n_clusters=3) kernel_kmeans.fit(X_train[0:max_train]) test_shape_result = kernel_kmeans.predict(X_test[0:max_train]) - score = kernel_kmeans.score(X_test[0:max_train]) proba = kernel_kmeans.predict_proba(X_test[0:max_train]) assert np.array_equal(test_shape_result, expected_results) - np.testing.assert_almost_equal(score, expected_score) assert kernel_kmeans.n_iter_ == expected_iters assert np.array_equal(kernel_kmeans.labels_, expected_labels) assert proba.shape == (max_train, 3) diff --git a/aeon/testing/mock_estimators/_mock_clusterers.py b/aeon/testing/mock_estimators/_mock_clusterers.py index 0563129909..ea6772c092 100644 --- a/aeon/testing/mock_estimators/_mock_clusterers.py +++ b/aeon/testing/mock_estimators/_mock_clusterers.py @@ -1,5 +1,3 @@ -from typing import Optional - import numpy as np from aeon.clustering.base import BaseClusterer @@ -9,8 +7,8 @@ class MockCluster(BaseClusterer): """Mock Cluster for testing base class fit/predict.""" - def __init__(self, n_clusters: Optional[int] = None): - super().__init__(n_clusters) + def __init__(self): + super().__init__() def _fit(self, X): """Mock fit.""" @@ -25,9 +23,6 @@ def _predict_proba(self, X): y = np.random.rand(len(X)) return y - def _score(self, X, y): - return np.random.randn(1) - class MockDeepClusterer(BaseDeepClusterer): """Mock Deep Clusterer for testing empty base deep class save utilities.""" From 24732d2308c6e9ad2290162715510e61e0dabbe5 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Mon, 4 Nov 2024 11:25:37 +0000 Subject: [PATCH 06/25] fit_predict --- aeon/clustering/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 780fd5d501..bf73c2a136 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -122,6 +122,7 @@ def predict_proba(self, X) -> np.ndarray: self._check_shape(X) return self._predict_proba(X) + @final def fit_predict(self, X, y=None) -> np.ndarray: """Compute cluster centers and predict cluster index for each time series. @@ -159,6 +160,8 @@ def _fit_predict(self, X, y=None) -> np.ndarray: Index of the cluster each time series in X belongs to. """ self.fit(X) + if hasattr(self, "labels_"): + return self.labels_ return self.predict(X) def _predict_proba(self, X) -> np.ndarray: From e83387c1405bd57ec8f96c0a90d7dae9913cf283 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 6 Nov 2024 15:39:49 +0000 Subject: [PATCH 07/25] fixed predict proba --- aeon/clustering/base.py | 2 +- aeon/clustering/deep_learning/_ae_bgru.py | 2 +- aeon/clustering/feature_based/_catch22.py | 13 +------------ aeon/clustering/feature_based/_summary.py | 13 +------------ aeon/clustering/feature_based/_tsfresh.py | 13 +------------ 5 files changed, 5 insertions(+), 38 deletions(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index bf73c2a136..92989a4c2c 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -201,7 +201,7 @@ def _predict_proba(self, X) -> np.ndarray: n_clusters = len(np.unique(preds)) if n_clusters is None: n_clusters = int(max(preds)) + 1 - dists = np.zeros((X.shape[0], n_clusters)) + dists = np.zeros((len(X), n_clusters)) for i in range(n_cases): dists[i, preds[i]] = 1 return dists diff --git a/aeon/clustering/deep_learning/_ae_bgru.py b/aeon/clustering/deep_learning/_ae_bgru.py index 9b7df32716..c79632d504 100644 --- a/aeon/clustering/deep_learning/_ae_bgru.py +++ b/aeon/clustering/deep_learning/_ae_bgru.py @@ -141,9 +141,9 @@ def __init__( self.best_file_name = best_file_name self.random_state = random_state self.estimator = estimator + self.n_clusters = n_clusters super().__init__( - n_clusters=n_clusters, estimator=estimator, batch_size=batch_size, last_file_name=last_file_name, diff --git a/aeon/clustering/feature_based/_catch22.py b/aeon/clustering/feature_based/_catch22.py index 74d2d54674..e365e04917 100644 --- a/aeon/clustering/feature_based/_catch22.py +++ b/aeon/clustering/feature_based/_catch22.py @@ -201,18 +201,7 @@ def _predict_proba(self, X) -> np.ndarray: if callable(m): return self._estimator.predict_proba(self._transformer.transform(X)) else: - preds = self._estimator.predict(self._transformer.transform(X)) - unique = np.unique(preds) - for i, u in enumerate(unique): - preds[preds == u] = i - n_cases = len(preds) - n_clusters = self.n_clusters - if n_clusters is None: - n_clusters = int(max(preds)) + 1 - dists = np.zeros((len(X), n_clusters)) - for i in range(n_cases): - dists[i, preds[i]] = 1 - return dists + return super()._predict_proba(X) @classmethod def _get_test_params(cls, parameter_set="default"): diff --git a/aeon/clustering/feature_based/_summary.py b/aeon/clustering/feature_based/_summary.py index ddd3e3825e..616582da2a 100644 --- a/aeon/clustering/feature_based/_summary.py +++ b/aeon/clustering/feature_based/_summary.py @@ -157,15 +157,4 @@ def _predict_proba(self, X) -> np.ndarray: if callable(m): return self._estimator.predict_proba(self._transformer.transform(X)) else: - preds = self._estimator.predict(self._transformer.transform(X)) - unique = np.unique(preds) - for i, u in enumerate(unique): - preds[preds == u] = i - n_cases = len(preds) - n_clusters = self.n_clusters - if n_clusters is None: - n_clusters = int(max(preds)) + 1 - dists = np.zeros((X.shape[0], n_clusters)) - for i in range(n_cases): - dists[i, preds[i]] = 1 - return dists + return super()._predict_proba(X) diff --git a/aeon/clustering/feature_based/_tsfresh.py b/aeon/clustering/feature_based/_tsfresh.py index c735602ff4..4987e9bfda 100644 --- a/aeon/clustering/feature_based/_tsfresh.py +++ b/aeon/clustering/feature_based/_tsfresh.py @@ -198,18 +198,7 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: if callable(m): return self._estimator.predict_proba(self._transformer.transform(X)) else: - preds = self._estimator.predict(self._transformer.transform(X)) - unique = np.unique(preds) - for i, u in enumerate(unique): - preds[preds == u] = i - n_cases = len(preds) - n_clusters = self.n_clusters - if n_clusters is None: - n_clusters = int(max(preds)) + 1 - dists = np.zeros((X.shape[0], n_clusters)) - for i in range(n_cases): - dists[i, preds[i]] = 1 - return dists + return super()._predict_proba(X) @classmethod def _get_test_params(cls, parameter_set: str = "default"): From 30fbda933eabfa1866adb58aacbd335c058f33aa Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 6 Nov 2024 16:05:46 +0000 Subject: [PATCH 08/25] fixed dnn tests --- aeon/testing/mock_estimators/_mock_clusterers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aeon/testing/mock_estimators/_mock_clusterers.py b/aeon/testing/mock_estimators/_mock_clusterers.py index ea6772c092..c44d76458f 100644 --- a/aeon/testing/mock_estimators/_mock_clusterers.py +++ b/aeon/testing/mock_estimators/_mock_clusterers.py @@ -30,7 +30,6 @@ class MockDeepClusterer(BaseDeepClusterer): def __init__(self, estimator=None, last_file_name="last_file"): self.last_file_name = last_file_name super().__init__( - n_clusters=None, estimator=estimator, last_file_name=last_file_name, clustering_params={"n_init": 1, "averaging_method": "mean"}, From 3557ac4e1711d5db8145bbf39eaecfa6a7ae949f Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 6 Nov 2024 16:19:44 +0000 Subject: [PATCH 09/25] fixed dnn tests --- aeon/clustering/deep_learning/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aeon/clustering/deep_learning/base.py b/aeon/clustering/deep_learning/base.py index ced00367a6..6c9ed5d63f 100644 --- a/aeon/clustering/deep_learning/base.py +++ b/aeon/clustering/deep_learning/base.py @@ -121,7 +121,6 @@ def _fit_clustering(self, X): if ( self.clustering_algorithm != "deprecated" or self.clustering_params is not None - or self.n_clusters is not None ): warnings.warn( "The 'n_clusters' 'clustering_algorithm' and " From 2a5807d3dd992ca0267612f3c4d0fcad005d7abe Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 6 Nov 2024 17:23:25 +0000 Subject: [PATCH 10/25] fix notebook --- examples/networks/deep_learning.ipynb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/networks/deep_learning.ipynb b/examples/networks/deep_learning.ipynb index 43ec5b0353..130b7166d7 100644 --- a/examples/networks/deep_learning.ipynb +++ b/examples/networks/deep_learning.ipynb @@ -288,9 +288,7 @@ "aefcn.fit(X=xtrain)\n", "ypred = aefcn.predict(X=xtest)\n", "print(\"Predictions: \", ypred[0:5])\n", - "print(\"Ground Truth: \", ytest[0:5])\n", - "print()\n", - "print(\"Score : \", aefcn.score(X=xtest))" + "print(\"Ground Truth: \", ytest[0:5])" ] }, { From 3f80902ae299fb35fcefe441c6f5cc0ae1aa915e Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 6 Nov 2024 17:39:52 +0000 Subject: [PATCH 11/25] fixed --- aeon/clustering/deep_learning/_ae_bgru.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/clustering/deep_learning/_ae_bgru.py b/aeon/clustering/deep_learning/_ae_bgru.py index 1df07fa344..457ae6581c 100644 --- a/aeon/clustering/deep_learning/_ae_bgru.py +++ b/aeon/clustering/deep_learning/_ae_bgru.py @@ -140,6 +140,7 @@ def __init__( self.save_last_model = save_last_model self.best_file_name = best_file_name self.random_state = random_state + self.n_clusters = n_clusters super().__init__( clustering_algorithm=clustering_algorithm, From a591c31c111495fefdfe6ef27350e7b80fce1a7e Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 15 Nov 2024 16:52:50 +0100 Subject: [PATCH 12/25] removed deep learner n_clusters and assert labels_ exists --- aeon/clustering/deep_learning/_ae_abgru.py | 4 -- aeon/clustering/deep_learning/_ae_bgru.py | 4 -- aeon/clustering/deep_learning/_ae_dcnn.py | 4 -- aeon/clustering/deep_learning/_ae_drnn.py | 4 -- aeon/clustering/deep_learning/_ae_fcn.py | 4 -- aeon/clustering/deep_learning/_ae_resnet.py | 4 -- .../_yield_clustering_checks.py | 41 +++++++++++++++++++ 7 files changed, 41 insertions(+), 24 deletions(-) diff --git a/aeon/clustering/deep_learning/_ae_abgru.py b/aeon/clustering/deep_learning/_ae_abgru.py index 02943abcf7..5c8866a9e9 100644 --- a/aeon/clustering/deep_learning/_ae_abgru.py +++ b/aeon/clustering/deep_learning/_ae_abgru.py @@ -20,8 +20,6 @@ class AEAttentionBiGRUClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Number of clusters for the deep learnign model. clustering_algorithm : str, default="deprecated" Use 'estimator' parameter instead. clustering_params : dict, default=None @@ -100,7 +98,6 @@ class AEAttentionBiGRUClusterer(BaseDeepClusterer): def __init__( self, - n_clusters=None, estimator=None, clustering_algorithm="deprecated", clustering_params=None, @@ -143,7 +140,6 @@ def __init__( self.random_state = random_state super().__init__( - n_clusters=n_clusters, clustering_algorithm=clustering_algorithm, clustering_params=clustering_params, estimator=estimator, diff --git a/aeon/clustering/deep_learning/_ae_bgru.py b/aeon/clustering/deep_learning/_ae_bgru.py index 457ae6581c..59e4fb71ca 100644 --- a/aeon/clustering/deep_learning/_ae_bgru.py +++ b/aeon/clustering/deep_learning/_ae_bgru.py @@ -20,8 +20,6 @@ class AEBiGRUClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Number of clusters for the deep learnign model. clustering_algorithm : str, default="deprecated" Use 'estimator' parameter instead. clustering_params : dict, default=None @@ -99,7 +97,6 @@ class AEBiGRUClusterer(BaseDeepClusterer): def __init__( self, - n_clusters=None, clustering_algorithm="deprecated", estimator=None, clustering_params=None, @@ -140,7 +137,6 @@ def __init__( self.save_last_model = save_last_model self.best_file_name = best_file_name self.random_state = random_state - self.n_clusters = n_clusters super().__init__( clustering_algorithm=clustering_algorithm, diff --git a/aeon/clustering/deep_learning/_ae_dcnn.py b/aeon/clustering/deep_learning/_ae_dcnn.py index 486457be0d..e9171c5c01 100644 --- a/aeon/clustering/deep_learning/_ae_dcnn.py +++ b/aeon/clustering/deep_learning/_ae_dcnn.py @@ -19,8 +19,6 @@ class AEDCNNClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Number of clusters for the deep learnign model. clustering_algorithm : str, default="deprecated" Use 'estimator' parameter instead. clustering_params : dict, default=None @@ -113,7 +111,6 @@ class AEDCNNClusterer(BaseDeepClusterer): def __init__( self, - n_clusters=None, estimator=None, clustering_algorithm="deprecated", clustering_params=None, @@ -164,7 +161,6 @@ def __init__( self.random_state = random_state super().__init__( - n_clusters=n_clusters, clustering_params=clustering_params, clustering_algorithm=clustering_algorithm, estimator=estimator, diff --git a/aeon/clustering/deep_learning/_ae_drnn.py b/aeon/clustering/deep_learning/_ae_drnn.py index 61777ba78c..2f22c15150 100644 --- a/aeon/clustering/deep_learning/_ae_drnn.py +++ b/aeon/clustering/deep_learning/_ae_drnn.py @@ -24,8 +24,6 @@ class AEDRNNClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Number of clusters for the deep learnign model. clustering_algorithm : str, default="deprecated" Please use the 'estimator' parameter. estimator : aeon clusterer, default=None @@ -114,7 +112,6 @@ class AEDRNNClusterer(BaseDeepClusterer): def __init__( self, - n_clusters=None, estimator=None, clustering_algorithm="deprecated", clustering_params=None, @@ -167,7 +164,6 @@ def __init__( self.random_state = random_state super().__init__( - n_clusters=n_clusters, estimator=estimator, clustering_algorithm=clustering_algorithm, clustering_params=clustering_params, diff --git a/aeon/clustering/deep_learning/_ae_fcn.py b/aeon/clustering/deep_learning/_ae_fcn.py index 1787284281..f3ab250f48 100644 --- a/aeon/clustering/deep_learning/_ae_fcn.py +++ b/aeon/clustering/deep_learning/_ae_fcn.py @@ -21,8 +21,6 @@ class AEFCNClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Please use 'estimator' parameter. estimator : aeon clusterer, default=None An aeon estimator to be built using the transformed data. Defaults to aeon TimeSeriesKMeans() with euclidean distance @@ -122,7 +120,6 @@ class AEFCNClusterer(BaseDeepClusterer): def __init__( self, - n_clusters=None, estimator=None, clustering_algorithm="deprecated", clustering_params=None, @@ -173,7 +170,6 @@ def __init__( self.save_last_model = save_last_model self.best_file_name = best_file_name self.random_state = random_state - self.n_clusters = n_clusters super().__init__( estimator=estimator, diff --git a/aeon/clustering/deep_learning/_ae_resnet.py b/aeon/clustering/deep_learning/_ae_resnet.py index 3674adaa2c..aed5900d4e 100644 --- a/aeon/clustering/deep_learning/_ae_resnet.py +++ b/aeon/clustering/deep_learning/_ae_resnet.py @@ -24,8 +24,6 @@ class AEResNetClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Please use 'estimator' parameter. estimator : aeon clusterer, default=None An aeon estimator to be built using the transformed data. Defaults to aeon TimeSeriesKMeans() with euclidean distance @@ -131,7 +129,6 @@ class method save_last_model_to_file. def __init__( self, - n_clusters=None, estimator=None, n_residual_blocks=3, clustering_algorithm="deprecated", @@ -182,7 +179,6 @@ def __init__( self.best_file_name = best_file_name self.last_file_name = last_file_name self.optimizer = optimizer - self.n_clusters = n_clusters self.history = None diff --git a/aeon/testing/estimator_checking/_yield_clustering_checks.py b/aeon/testing/estimator_checking/_yield_clustering_checks.py index 4843f13056..4c704f55d7 100644 --- a/aeon/testing/estimator_checking/_yield_clustering_checks.py +++ b/aeon/testing/estimator_checking/_yield_clustering_checks.py @@ -7,6 +7,7 @@ from aeon.base._base import _clone_estimator from aeon.clustering.deep_learning import BaseDeepClusterer from aeon.testing.testing_data import FULL_TEST_DATA_DICT +from aeon.utils.validation import get_n_cases def _yield_clustering_checks(estimator_class, estimator_instances, datatypes): @@ -26,6 +27,10 @@ def _yield_clustering_checks(estimator_class, estimator_instances, datatypes): estimator=estimator, datatype=datatypes[i][0], ) + for datatype in datatypes[i]: + yield partial( + check_clusterer_output, estimator=estimator, datatype=datatype + ) def check_clusterer_tags_consistent(estimator_class): @@ -82,3 +87,39 @@ def check_clustering_random_state_deep_learning(estimator, datatype): _weight2 = np.asarray(weights2[j]) np.testing.assert_almost_equal(_weight1, _weight2, 4) + + +def check_clusterer_output(estimator, datatype): + """Test clusterer outputs the correct data types and values. + + Test predict produces a np.array or pd.Series with only values seen in the train + data, and that predict_proba probability estimates add up to one. + """ + estimator = _clone_estimator(estimator) + + unique_labels = np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]) + + # run fit and predict + estimator.fit( + FULL_TEST_DATA_DICT[datatype]["train"][0], + FULL_TEST_DATA_DICT[datatype]["train"][1], + ) + assert hasattr(estimator, "labels_") + assert isinstance(estimator.labels_, np.ndarray) + + y_pred = estimator.predict(FULL_TEST_DATA_DICT[datatype]["test"][0]) + + # check predict + assert isinstance(y_pred, np.ndarray) + assert y_pred.shape == (get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]),) + assert np.all(np.isin(np.unique(y_pred), unique_labels)) + + # check predict proba (all classifiers have predict_proba by default) + y_proba = estimator.predict_proba(FULL_TEST_DATA_DICT[datatype]["test"][0]) + + assert isinstance(y_proba, np.ndarray) + assert y_proba.shape == ( + get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]), + len(unique_labels), + ) + np.testing.assert_almost_equal(y_proba.sum(axis=1), 1, decimal=4) From 2ee81ba8e574ab62a909ebb0abe37cd69e86543b Mon Sep 17 00:00:00 2001 From: chrisholder Date: Sun, 17 Nov 2024 21:25:22 +0100 Subject: [PATCH 13/25] cont --- aeon/clustering/feature_based/_catch22.py | 1 + aeon/clustering/feature_based/_summary.py | 2 ++ aeon/clustering/feature_based/_tsfresh.py | 1 + .../testing/estimator_checking/_yield_clustering_checks.py | 7 ------- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/aeon/clustering/feature_based/_catch22.py b/aeon/clustering/feature_based/_catch22.py index e365e04917..33f0b79bc5 100644 --- a/aeon/clustering/feature_based/_catch22.py +++ b/aeon/clustering/feature_based/_catch22.py @@ -164,6 +164,7 @@ def _fit(self, X, y=None): X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) + self.labels_ = self._estimator.labels_ return self diff --git a/aeon/clustering/feature_based/_summary.py b/aeon/clustering/feature_based/_summary.py index 616582da2a..309d3ac92f 100644 --- a/aeon/clustering/feature_based/_summary.py +++ b/aeon/clustering/feature_based/_summary.py @@ -121,6 +121,8 @@ def _fit(self, X, y=None): X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) + self.labels_ = self._estimator.labels_ + return self def _predict(self, X) -> np.ndarray: diff --git a/aeon/clustering/feature_based/_tsfresh.py b/aeon/clustering/feature_based/_tsfresh.py index 4987e9bfda..ed14e90a47 100644 --- a/aeon/clustering/feature_based/_tsfresh.py +++ b/aeon/clustering/feature_based/_tsfresh.py @@ -162,6 +162,7 @@ def _fit(self, X: np.ndarray, y: Optional[np.ndarray] = None): else: self._estimator.fit(X_t, y) + self.labels_ = self._estimator.labels_ return self def _predict(self, X: np.ndarray) -> np.ndarray: diff --git a/aeon/testing/estimator_checking/_yield_clustering_checks.py b/aeon/testing/estimator_checking/_yield_clustering_checks.py index 4c704f55d7..117a0704b6 100644 --- a/aeon/testing/estimator_checking/_yield_clustering_checks.py +++ b/aeon/testing/estimator_checking/_yield_clustering_checks.py @@ -97,8 +97,6 @@ def check_clusterer_output(estimator, datatype): """ estimator = _clone_estimator(estimator) - unique_labels = np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]) - # run fit and predict estimator.fit( FULL_TEST_DATA_DICT[datatype]["train"][0], @@ -112,14 +110,9 @@ def check_clusterer_output(estimator, datatype): # check predict assert isinstance(y_pred, np.ndarray) assert y_pred.shape == (get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]),) - assert np.all(np.isin(np.unique(y_pred), unique_labels)) # check predict proba (all classifiers have predict_proba by default) y_proba = estimator.predict_proba(FULL_TEST_DATA_DICT[datatype]["test"][0]) assert isinstance(y_proba, np.ndarray) - assert y_proba.shape == ( - get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]), - len(unique_labels), - ) np.testing.assert_almost_equal(y_proba.sum(axis=1), 1, decimal=4) From ebec4e642365bc0633a1d60ff452f1ababc7496c Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 19 Nov 2024 22:13:27 +0100 Subject: [PATCH 14/25] fix dnns --- aeon/clustering/deep_learning/_ae_resnet.py | 1 + aeon/clustering/deep_learning/base.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/aeon/clustering/deep_learning/_ae_resnet.py b/aeon/clustering/deep_learning/_ae_resnet.py index aed5900d4e..638c64fa57 100644 --- a/aeon/clustering/deep_learning/_ae_resnet.py +++ b/aeon/clustering/deep_learning/_ae_resnet.py @@ -344,6 +344,7 @@ def _fit(self, X): self._fit_clustering(X=X) gc.collect() + return self def _score(self, X, y=None): diff --git a/aeon/clustering/deep_learning/base.py b/aeon/clustering/deep_learning/base.py index 3e4cf6f5ad..64e6cb1cc6 100644 --- a/aeon/clustering/deep_learning/base.py +++ b/aeon/clustering/deep_learning/base.py @@ -136,6 +136,10 @@ def _fit_clustering(self, X): latent_space = self.model_.layers[1].predict(X) self._estimator.fit(X=latent_space) + if hasattr(self._estimator, "labels_"): + self.labels_ = self._estimator.labels_ + else: + self.labels_ = self._estimator.predict(X=latent_space) return self From 02fcee098380671c6a61c1ab24a4cd1be7d041b3 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 19 Nov 2024 22:27:45 +0100 Subject: [PATCH 15/25] pipeline clusterer --- aeon/clustering/compose/_pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aeon/clustering/compose/_pipeline.py b/aeon/clustering/compose/_pipeline.py index eeeb2b43b8..1e3ed54ac8 100644 --- a/aeon/clustering/compose/_pipeline.py +++ b/aeon/clustering/compose/_pipeline.py @@ -86,7 +86,9 @@ def __init__(self, transformers, clusterer, random_state=None): ) def _fit(self, X, y=None): - return super()._fit(X, y) + super()._fit(X, y) + self.labels_ = self._estimator.labels_ + return self @classmethod def _get_test_params(cls, parameter_set="default"): From 91048084da1a562052c6819e7e5b174adcd91527 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 19 Nov 2024 22:53:23 +0100 Subject: [PATCH 16/25] fix pipeline --- aeon/clustering/compose/_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/compose/_pipeline.py b/aeon/clustering/compose/_pipeline.py index 1e3ed54ac8..3337f7e032 100644 --- a/aeon/clustering/compose/_pipeline.py +++ b/aeon/clustering/compose/_pipeline.py @@ -86,8 +86,8 @@ def __init__(self, transformers, clusterer, random_state=None): ) def _fit(self, X, y=None): - super()._fit(X, y) - self.labels_ = self._estimator.labels_ + return super()._fit(X, y) + self.labels_ = self.clusterer.labels_ return self @classmethod From 1c854875113a3ee86e4cc99c3e58bfd55ee3f01c Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 19 Nov 2024 22:57:18 +0100 Subject: [PATCH 17/25] revert --- aeon/clustering/compose/_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/compose/_pipeline.py b/aeon/clustering/compose/_pipeline.py index 3337f7e032..5a1026550b 100644 --- a/aeon/clustering/compose/_pipeline.py +++ b/aeon/clustering/compose/_pipeline.py @@ -86,7 +86,7 @@ def __init__(self, transformers, clusterer, random_state=None): ) def _fit(self, X, y=None): - return super()._fit(X, y) + super()._fit(X, y) self.labels_ = self.clusterer.labels_ return self From c89d854e8b88f37ed352d701062e10124f1d1387 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 11:27:01 +0100 Subject: [PATCH 18/25] fix --- aeon/clustering/compose/_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/compose/_pipeline.py b/aeon/clustering/compose/_pipeline.py index 5a1026550b..eb6c255806 100644 --- a/aeon/clustering/compose/_pipeline.py +++ b/aeon/clustering/compose/_pipeline.py @@ -87,7 +87,7 @@ def __init__(self, transformers, clusterer, random_state=None): def _fit(self, X, y=None): super()._fit(X, y) - self.labels_ = self.clusterer.labels_ + self.labels_ = self.steps_[-1][1].labels_ return self @classmethod From 4c1e6d0c2f29aa2bdd23f806bba79ed525c8eeb8 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 11:53:44 +0100 Subject: [PATCH 19/25] remove score from dnns --- aeon/clustering/base.py | 4 +--- aeon/clustering/deep_learning/_ae_abgru.py | 6 ------ aeon/clustering/deep_learning/_ae_bgru.py | 6 ------ aeon/clustering/deep_learning/_ae_dcnn.py | 6 ------ aeon/clustering/deep_learning/_ae_drnn.py | 6 ------ aeon/clustering/deep_learning/_ae_resnet.py | 6 ------ 6 files changed, 1 insertion(+), 33 deletions(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 92989a4c2c..39f216933a 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -160,9 +160,7 @@ def _fit_predict(self, X, y=None) -> np.ndarray: Index of the cluster each time series in X belongs to. """ self.fit(X) - if hasattr(self, "labels_"): - return self.labels_ - return self.predict(X) + return self.labels_ def _predict_proba(self, X) -> np.ndarray: """Predicts labels probabilities for sequences in X. diff --git a/aeon/clustering/deep_learning/_ae_abgru.py b/aeon/clustering/deep_learning/_ae_abgru.py index 88065ddffe..3b41dbfddc 100644 --- a/aeon/clustering/deep_learning/_ae_abgru.py +++ b/aeon/clustering/deep_learning/_ae_abgru.py @@ -298,12 +298,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_bgru.py b/aeon/clustering/deep_learning/_ae_bgru.py index 6e12e23e03..609eaf5d65 100644 --- a/aeon/clustering/deep_learning/_ae_bgru.py +++ b/aeon/clustering/deep_learning/_ae_bgru.py @@ -296,12 +296,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_dcnn.py b/aeon/clustering/deep_learning/_ae_dcnn.py index 4a83a10eb2..d6c6b8c3d5 100644 --- a/aeon/clustering/deep_learning/_ae_dcnn.py +++ b/aeon/clustering/deep_learning/_ae_dcnn.py @@ -322,12 +322,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_drnn.py b/aeon/clustering/deep_learning/_ae_drnn.py index a6551d411d..0efedfb730 100644 --- a/aeon/clustering/deep_learning/_ae_drnn.py +++ b/aeon/clustering/deep_learning/_ae_drnn.py @@ -328,12 +328,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_resnet.py b/aeon/clustering/deep_learning/_ae_resnet.py index 26acce6b5b..ff34143281 100644 --- a/aeon/clustering/deep_learning/_ae_resnet.py +++ b/aeon/clustering/deep_learning/_ae_resnet.py @@ -362,12 +362,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - def _fit_multi_rec_model( self, autoencoder, From 6d6849227c694857441c3d4423880d1835823d99 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 11:56:40 +0100 Subject: [PATCH 20/25] remove score from notebooks --- .../clustering/partitional_clustering.ipynb | 119 +----------------- 1 file changed, 4 insertions(+), 115 deletions(-) diff --git a/examples/clustering/partitional_clustering.ipynb b/examples/clustering/partitional_clustering.ipynb index 646845ac70..817f66bd24 100644 --- a/examples/clustering/partitional_clustering.ipynb +++ b/examples/clustering/partitional_clustering.ipynb @@ -1217,46 +1217,14 @@ "cell_type": "markdown", "source": [ "We have formed two clusters. The pattern of the two centroids seems fairly similar,\n", - "and the separation of clusters does not seem very good. We can score the clustering\n", - "with the score method, which by default returns `self.inertia_`, which is a measure\n", - "of between cluster variation used as a stopping condition.\n", + "and the separation of clusters does not seem very good.\n", "For a range of clusteirng comparison algorithms, see the [sklearn clustering API]\n", - "(https://scikit-learn.org/stable/modules/clustering.html#clustering-performance-evaluation)\n", - "\n", - "Low intertia is better, so to conform to the sklearn interface which prefers to\n", - "maximize performance criteria, inertia scores are negative.\n" + "(https://scikit-learn.org/stable/modules/clustering.html#clustering-performance-evaluation)\n" ], "metadata": { "collapsed": false } }, - { - "cell_type": "code", - "source": [ - "s1 = k_means.score(X_test, y_test)\n", - "s1" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-09-25T22:59:26.618842Z", - "start_time": "2024-09-25T22:59:26.613249Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "-5016911.727324263" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 16 - }, { "cell_type": "markdown", "source": [ @@ -1332,39 +1300,12 @@ "source": [ "We have formed two clusters with barycentre averaging using MSM distance. The centroids\n", "seem more distinct in shape now, with the first cluster centroid looking much\n", - "flatter than the second. The inertia has been reduced, so the score is higher." + "flatter than the second." ], "metadata": { "collapsed": false } }, - { - "cell_type": "code", - "source": [ - "s2 = k_means.score(X_test, y_test)\n", - "s2" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-09-25T22:59:26.985041Z", - "start_time": "2024-09-25T22:59:26.980060Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "-26178.727675421676" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 18 - }, { "cell_type": "markdown", "source": [ @@ -1441,7 +1382,6 @@ ")\n", "\n", "k_medoids.fit(X_train)\n", - "s3 = k_medoids.score(X_test, y_test)\n", "plot_cluster_algorithm(k_medoids, X_test, k_medoids.n_clusters)" ], "metadata": { @@ -1525,7 +1465,6 @@ ")\n", "\n", "k_medoids.fit(X_train)\n", - "s4 = k_medoids.score(X_test, y_test)\n", "plot_cluster_algorithm(k_medoids, X_test, k_medoids.n_clusters)" ], "metadata": { @@ -1568,29 +1507,6 @@ ], "execution_count": 21 }, - { - "cell_type": "code", - "source": [ - "print(f\" PAM DTW score {s3} PAM MSM score {s4}\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-09-25T22:59:27.856857Z", - "start_time": "2024-09-25T22:59:27.851841Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " PAM DTW score -5739730.0 PAM MSM score -25984.0\n" - ] - } - ], - "execution_count": 22 - }, { "cell_type": "markdown", "source": [ @@ -1619,9 +1535,7 @@ ")\n", "\n", "k_medoids.fit(X_train)\n", - "s5 = k_medoids.score(X_test, y_test)\n", - "plot_cluster_algorithm(k_medoids, X_test, k_medoids.n_clusters)\n", - "print(\"Alternate MSM score = \", s5)" + "plot_cluster_algorithm(k_medoids, X_test, k_medoids.n_clusters)" ], "metadata": { "collapsed": false, @@ -1686,7 +1600,6 @@ " random_state=1,\n", ")\n", "clara.fit(X_train)\n", - "s6 = k_medoids.score(X_test, y_test)\n", "plot_cluster_algorithm(clara, X_test, clara.n_clusters)" ], "metadata": { @@ -1755,7 +1668,6 @@ " random_state=1,\n", ")\n", "clara.fit(X_train)\n", - "s7 = k_medoids.score(X_test, y_test)\n", "plot_cluster_algorithm(clara, X_test, clara.n_clusters)" ], "metadata": { @@ -1798,29 +1710,6 @@ ], "execution_count": 25 }, - { - "cell_type": "code", - "source": [ - "print(f\" Clara score {s6} Clarans score = {s7}\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-09-25T22:59:28.966368Z", - "start_time": "2024-09-25T22:59:28.961420Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Clara score -25984.0 Clarans score = -25984.0\n" - ] - } - ], - "execution_count": 26 - }, { "cell_type": "markdown", "source": [ From a56b4d4442442e6b5a201a5ff9d6cbd104252386 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 12:03:15 +0100 Subject: [PATCH 21/25] remove score from remaining --- aeon/clustering/_k_shapes.py | 3 --- aeon/clustering/_kernel_k_means.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/aeon/clustering/_k_shapes.py b/aeon/clustering/_k_shapes.py index 4cb50fdec5..8809582a95 100644 --- a/aeon/clustering/_k_shapes.py +++ b/aeon/clustering/_k_shapes.py @@ -181,6 +181,3 @@ def _get_test_params(cls, parameter_set="default"): "verbose": False, "random_state": 1, } - - def _score(self, X, y=None): - return np.abs(self.inertia_) diff --git a/aeon/clustering/_kernel_k_means.py b/aeon/clustering/_kernel_k_means.py index 2a7b255757..9c92d250bd 100644 --- a/aeon/clustering/_kernel_k_means.py +++ b/aeon/clustering/_kernel_k_means.py @@ -205,6 +205,3 @@ def _get_test_params(cls, parameter_set="default") -> dict: "n_jobs": 1, "random_state": 1, } - - def _score(self, X, y=None) -> float: - return np.abs(self.inertia_) From f6b0ffb940b5f8ae277a40d2a8b1624ef09780ad Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 13:55:41 +0100 Subject: [PATCH 22/25] fix notebook --- examples/networks/deep_learning.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/networks/deep_learning.ipynb b/examples/networks/deep_learning.ipynb index 130b7166d7..498498f5fe 100644 --- a/examples/networks/deep_learning.ipynb +++ b/examples/networks/deep_learning.ipynb @@ -279,7 +279,6 @@ "xtest, ytest = load_classification(name=\"ArrowHead\", split=\"test\")\n", "\n", "aefcn = AEFCNClusterer(\n", - " n_clusters=2,\n", " temporal_latent_space=False,\n", " clustering_algorithm=\"kmeans\",\n", " n_epochs=10,\n", From c61c3c9fe3ef4aae26a21bd8e60b9dac72c9c20f Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 18:43:27 +0100 Subject: [PATCH 23/25] fixed kmeans bug stopping tests working --- aeon/clustering/_k_means.py | 2 +- .../estimator_checking/_yield_clustering_checks.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/aeon/clustering/_k_means.py b/aeon/clustering/_k_means.py index 2581b2d5f4..dce7cd0084 100644 --- a/aeon/clustering/_k_means.py +++ b/aeon/clustering/_k_means.py @@ -268,7 +268,7 @@ def _fit_one_init(self, X: np.ndarray) -> tuple: prev_inertia = curr_inertia prev_labels = curr_labels - if change_in_centres < self.tol: + if change_in_centres < self.tol or (i + 1) == self.max_iter: break # Compute new cluster centres diff --git a/aeon/testing/estimator_checking/_yield_clustering_checks.py b/aeon/testing/estimator_checking/_yield_clustering_checks.py index 3f6285a750..6c86921f12 100644 --- a/aeon/testing/estimator_checking/_yield_clustering_checks.py +++ b/aeon/testing/estimator_checking/_yield_clustering_checks.py @@ -108,12 +108,12 @@ def check_clusterer_output(estimator, datatype): estimator = _clone_estimator(estimator) # run fit and predict - estimator.fit( - FULL_TEST_DATA_DICT[datatype]["train"][0], - FULL_TEST_DATA_DICT[datatype]["train"][1], - ) + data = FULL_TEST_DATA_DICT[datatype]["train"][0] + estimator.fit(data) assert hasattr(estimator, "labels_") assert isinstance(estimator.labels_, np.ndarray) + assert np.array_equal(estimator.labels_, estimator.fit_predict(data)) + assert np.array_equal(estimator.labels_, estimator.predict(data)) y_pred = estimator.predict(FULL_TEST_DATA_DICT[datatype]["test"][0]) From 5cc63cfe3b18edc9f84bfb6031a26d8f182598ca Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 21:26:53 +0100 Subject: [PATCH 24/25] fixed --- aeon/clustering/_k_shape.py | 2 +- aeon/clustering/_k_shapes.py | 2 +- aeon/clustering/dummy.py | 4 ++-- aeon/testing/estimator_checking/_yield_clustering_checks.py | 1 - 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/aeon/clustering/_k_shape.py b/aeon/clustering/_k_shape.py index ccb49bec80..ad94a9f10c 100644 --- a/aeon/clustering/_k_shape.py +++ b/aeon/clustering/_k_shape.py @@ -131,7 +131,7 @@ def _fit(self, X, y=None): self._tslearn_k_shapes.fit(_X) self._cluster_centers = self._tslearn_k_shapes.cluster_centers_ - self.labels_ = self._tslearn_k_shapes.labels_ + self.labels_ = self._tslearn_k_shapes.predict(_X) self.inertia_ = self._tslearn_k_shapes.inertia_ self.n_iter_ = self._tslearn_k_shapes.n_iter_ diff --git a/aeon/clustering/_k_shapes.py b/aeon/clustering/_k_shapes.py index 8809582a95..41148a4f9f 100644 --- a/aeon/clustering/_k_shapes.py +++ b/aeon/clustering/_k_shapes.py @@ -132,7 +132,7 @@ def _fit(self, X, y=None): self._tslearn_k_shapes.fit(_X) self._cluster_centers = self._tslearn_k_shapes.cluster_centers_ - self.labels_ = self._tslearn_k_shapes.labels_ + self.labels_ = self._tslearn_k_shapes.predict(_X) self.inertia_ = self._tslearn_k_shapes.inertia_ self.n_iter_ = self._tslearn_k_shapes.n_iter_ diff --git a/aeon/clustering/dummy.py b/aeon/clustering/dummy.py index 3c83d224e8..eb42c8ec7e 100644 --- a/aeon/clustering/dummy.py +++ b/aeon/clustering/dummy.py @@ -18,7 +18,7 @@ class DummyClusterer(BaseClusterer): Parameters ---------- - strategy : str, default="random" + strategy : str, default="uniform" The strategy to use for generating cluster labels. Supported strategies are: - "random": Assign clusters randomly. - "uniform": Distribute clusters uniformly among samples. @@ -54,7 +54,7 @@ class DummyClusterer(BaseClusterer): array([0, 1, 0]) """ - def __init__(self, strategy="random", n_clusters=3, random_state=None): + def __init__(self, strategy="uniform", n_clusters=3, random_state=None): self.strategy = strategy self.random_state = random_state self.n_clusters = n_clusters diff --git a/aeon/testing/estimator_checking/_yield_clustering_checks.py b/aeon/testing/estimator_checking/_yield_clustering_checks.py index 6c86921f12..5205316f94 100644 --- a/aeon/testing/estimator_checking/_yield_clustering_checks.py +++ b/aeon/testing/estimator_checking/_yield_clustering_checks.py @@ -112,7 +112,6 @@ def check_clusterer_output(estimator, datatype): estimator.fit(data) assert hasattr(estimator, "labels_") assert isinstance(estimator.labels_, np.ndarray) - assert np.array_equal(estimator.labels_, estimator.fit_predict(data)) assert np.array_equal(estimator.labels_, estimator.predict(data)) y_pred = estimator.predict(FULL_TEST_DATA_DICT[datatype]["test"][0]) From f8660d0644d98405737c37dc97ceb7a6cd2ffa2c Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 21:37:21 +0100 Subject: [PATCH 25/25] docstring fix --- aeon/clustering/dummy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/dummy.py b/aeon/clustering/dummy.py index eb42c8ec7e..55dbbe92da 100644 --- a/aeon/clustering/dummy.py +++ b/aeon/clustering/dummy.py @@ -47,7 +47,7 @@ class DummyClusterer(BaseClusterer): >>> X = np.array([[1, 2], [3, 4], [5, 6]]) >>> clusterer = DummyClusterer(strategy="uniform", n_clusters=2) >>> clusterer.fit(X) - DummyClusterer(n_clusters=2, strategy='uniform') + DummyClusterer(n_clusters=2) >>> clusterer.labels_ array([0, 1, 0]) >>> clusterer.predict(X)