From 2306ebdec5a5f7e95d340b16db8e8fbcec479485 Mon Sep 17 00:00:00 2001 From: Tony Bagnall Date: Fri, 25 Oct 2024 13:34:35 +0100 Subject: [PATCH 01/25] remove y from predict --- aeon/clustering/base.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 17231fdf1f..371f3ad74c 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -67,7 +67,7 @@ def fit(self, X, y=None) -> BaseCollectionEstimator: return self @final - def predict(self, X, y=None) -> np.ndarray: + def predict(self, X) -> np.ndarray: """Predict the closest cluster each sample in X belongs to. Parameters @@ -81,7 +81,6 @@ def predict(self, X, y=None) -> np.ndarray: of shape ``[n_cases]``, 2D np.array ``(n_channels, n_timepoints_i)``, where ``n_timepoints_i`` is length of series ``i``. Other types are allowed and converted into one of the above. - y: ignored, exists for API consistency reasons. Returns ------- From 04c385b461f70503fc7a90675e3cfcab596638c1 Mon Sep 17 00:00:00 2001 From: Tony Bagnall Date: Thu, 31 Oct 2024 17:02:57 +0000 Subject: [PATCH 02/25] remove score --- aeon/clustering/base.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 371f3ad74c..e3151db9b3 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -146,27 +146,6 @@ def fit_predict(self, X, y=None) -> np.ndarray: self.fit(X) return self.predict(X) - def score(self, X, y=None) -> float: - """Score the quality of the clusterer. - - Parameters - ---------- - X : np.ndarray (2d or 3d array of shape (n_cases, n_timepoints) or shape - (n_cases, n_channels, n_timepoints)). - Time series instances to train clusterer and then have indexes each belong - to return. - y: ignored, exists for API consistency reasons. - - Returns - ------- - score : float - Score of the clusterer. - """ - self._check_is_fitted() - X = self._preprocess_collection(X, store_metadata=False) - self._check_shape(X) - return self._score(X, y) - def _predict_proba(self, X) -> np.ndarray: """Predicts labels probabilities for sequences in X. From a346080107d7b047156aecaeef63d8dd42b59f94 Mon Sep 17 00:00:00 2001 From: Tony Bagnall Date: Thu, 31 Oct 2024 17:07:00 +0000 Subject: [PATCH 03/25] remove score, add fit_predict --- aeon/clustering/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index e3151db9b3..2913212f6d 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -143,6 +143,10 @@ def fit_predict(self, X, y=None) -> np.ndarray: np.ndarray (1d array of shape (n_cases,)) Index of the cluster each time series in X belongs to. """ + return self._fit_predict(X, y) + + def _fit_predict(self, X, y=None) -> np.ndarray: + """Fit predict using base methods.""" self.fit(X) return self.predict(X) @@ -185,9 +189,6 @@ def _predict_proba(self, X) -> np.ndarray: dists[i, preds[i]] = 1 return dists - @abstractmethod - def _score(self, X, y=None): ... - @abstractmethod def _predict(self, X, y=None) -> np.ndarray: """Predict the closest cluster each sample in X belongs to. From 60615dd109d59311d7688e5dd215238946fe2a78 Mon Sep 17 00:00:00 2001 From: Tony Bagnall Date: Thu, 31 Oct 2024 17:10:57 +0000 Subject: [PATCH 04/25] remove score, add fit_predict --- aeon/clustering/base.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 2913212f6d..78f804a935 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -146,7 +146,21 @@ def fit_predict(self, X, y=None) -> np.ndarray: return self._fit_predict(X, y) def _fit_predict(self, X, y=None) -> np.ndarray: - """Fit predict using base methods.""" + """Fit predict using base methods. + + Parameters + ---------- + X : np.ndarray (2d or 3d array of shape (n_cases, n_timepoints) or shape + (n_cases, n_channels, n_timepoints)). + Time series instances to train clusterer and then have indexes each belong + to return. + y: ignored, exists for API consistency reasons. + + Returns + ------- + np.ndarray (1d array of shape (n_cases,)) + Index of the cluster each time series in X belongs to. + """ self.fit(X) return self.predict(X) From aad00ef226d74ee77713192555c1d909c4f16cfd Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 1 Nov 2024 19:19:06 +0000 Subject: [PATCH 05/25] removed score and n_clusters from base class --- aeon/clustering/_clara.py | 6 ++---- aeon/clustering/_elastic_som.py | 6 ++---- aeon/clustering/_k_means.py | 6 ++---- aeon/clustering/_k_medoids.py | 6 ++---- aeon/clustering/_k_shape.py | 6 ++---- aeon/clustering/_k_shapes.py | 3 ++- aeon/clustering/_kernel_k_means.py | 3 ++- aeon/clustering/base.py | 10 +++++----- aeon/clustering/compose/_pipeline.py | 3 --- aeon/clustering/deep_learning/_ae_fcn.py | 8 +------- aeon/clustering/deep_learning/_ae_resnet.py | 8 +------- aeon/clustering/deep_learning/base.py | 4 +--- .../tests/test_deep_clusterer_base.py | 2 -- aeon/clustering/dummy.py | 19 ++----------------- aeon/clustering/feature_based/_catch22.py | 3 --- aeon/clustering/feature_based/_summary.py | 3 --- aeon/clustering/feature_based/_tsfresh.py | 3 --- aeon/clustering/tests/test_base.py | 3 --- aeon/clustering/tests/test_dummy.py | 15 --------------- aeon/clustering/tests/test_k_shape.py | 3 --- aeon/clustering/tests/test_kernel_k_means.py | 4 ---- .../mock_estimators/_mock_clusterers.py | 9 ++------- 22 files changed, 26 insertions(+), 107 deletions(-) diff --git a/aeon/clustering/_clara.py b/aeon/clustering/_clara.py index 4f44f5adab..dcc0e7a912 100644 --- a/aeon/clustering/_clara.py +++ b/aeon/clustering/_clara.py @@ -139,6 +139,7 @@ def __init__( self.distance_params = distance_params self.n_samples = n_samples self.n_sampling_iters = n_sampling_iters + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -148,7 +149,7 @@ def __init__( self._random_state = None self._kmedoids_instance = None - super().__init__(n_clusters) + super().__init__() def _predict(self, X: np.ndarray, y=None) -> np.ndarray: return self._kmedoids_instance.predict(X) @@ -207,9 +208,6 @@ def _fit(self, X: np.ndarray, y=None): self.n_iter_ = best_pam.n_iter_ self._kmedoids_instance = best_pam - def _score(self, X, y=None): - return -self.inertia_ - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/_elastic_som.py b/aeon/clustering/_elastic_som.py index e7d7d34682..36a8769b13 100644 --- a/aeon/clustering/_elastic_som.py +++ b/aeon/clustering/_elastic_som.py @@ -179,6 +179,7 @@ def __init__( self.init = init self.sigma_decay_function = sigma_decay_function self.custom_alignment_path = custom_alignment_path + self.n_clusters = n_clusters self._random_state = None self._alignment_path_callable = None @@ -191,7 +192,7 @@ def __init__( self.labels_ = None self.cluster_centers_ = None - super().__init__(n_clusters=n_clusters) + super().__init__() def _fit(self, X, y=None): self._check_params(X) @@ -219,9 +220,6 @@ def _fit(self, X, y=None): def _predict(self, X, y=None): return self._find_bmu(X, self.cluster_centers_) - def _score(self, X, y=None): - raise NotImplementedError("TimeSeriesSOM does not support scoring") - def _find_bmu(self, x, weights): pairwise_matrix = pairwise_distance( x, diff --git a/aeon/clustering/_k_means.py b/aeon/clustering/_k_means.py index 550d38944e..2581b2d5f4 100644 --- a/aeon/clustering/_k_means.py +++ b/aeon/clustering/_k_means.py @@ -192,6 +192,7 @@ def __init__( self.distance_params = distance_params self.average_params = average_params self.averaging_method = averaging_method + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -203,7 +204,7 @@ def __init__( self._averaging_method = None self._average_params = None - super().__init__(n_clusters) + super().__init__() def _fit(self, X: np.ndarray, y=None): self._check_params(X) @@ -281,9 +282,6 @@ def _fit_one_init(self, X: np.ndarray) -> tuple: return prev_labels, cluster_centres, prev_inertia, i + 1 - def _score(self, X, y=None): - return -self.inertia_ - def _predict(self, X: np.ndarray, y=None) -> np.ndarray: if isinstance(self.distance, str): pairwise_matrix = pairwise_distance( diff --git a/aeon/clustering/_k_medoids.py b/aeon/clustering/_k_medoids.py index 1f36f75ebe..ea8e860afc 100644 --- a/aeon/clustering/_k_medoids.py +++ b/aeon/clustering/_k_medoids.py @@ -171,6 +171,7 @@ def __init__( self.random_state = random_state self.distance_params = distance_params self.method = method + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -184,7 +185,7 @@ def __init__( self._fit_method = None self._distance_params = {} - super().__init__(n_clusters) + super().__init__() def _fit(self, X: np.ndarray, y=None): self._check_params(X) @@ -207,9 +208,6 @@ def _fit(self, X: np.ndarray, y=None): self.cluster_centers_ = best_centers self.n_iter_ = best_iters - def _score(self, X, y=None): - return -self.inertia_ - def _predict(self, X: np.ndarray, y=None) -> np.ndarray: if isinstance(self.distance, str): pairwise_matrix = pairwise_distance( diff --git a/aeon/clustering/_k_shape.py b/aeon/clustering/_k_shape.py index 3da2aca0cf..ccb49bec80 100644 --- a/aeon/clustering/_k_shape.py +++ b/aeon/clustering/_k_shape.py @@ -89,6 +89,7 @@ def __init__( self.tol = tol self.verbose = verbose self.random_state = random_state + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -97,7 +98,7 @@ def __init__( self._tslearn_k_shapes = None - super().__init__(n_clusters=n_clusters) + super().__init__() def _fit(self, X, y=None): """Fit time series clusterer to training data. @@ -179,6 +180,3 @@ def _get_test_params(cls, parameter_set="default"): "verbose": False, "random_state": 1, } - - def _score(self, X, y=None): - return np.abs(self.inertia_) diff --git a/aeon/clustering/_k_shapes.py b/aeon/clustering/_k_shapes.py index cdad58032a..4cb50fdec5 100644 --- a/aeon/clustering/_k_shapes.py +++ b/aeon/clustering/_k_shapes.py @@ -90,6 +90,7 @@ def __init__( self.tol = tol self.verbose = verbose self.random_state = random_state + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -98,7 +99,7 @@ def __init__( self._tslearn_k_shapes = None - super().__init__(n_clusters=n_clusters) + super().__init__() def _fit(self, X, y=None): """Fit time series clusterer to training data. diff --git a/aeon/clustering/_kernel_k_means.py b/aeon/clustering/_kernel_k_means.py index 6511c6a393..2a7b255757 100644 --- a/aeon/clustering/_kernel_k_means.py +++ b/aeon/clustering/_kernel_k_means.py @@ -108,6 +108,7 @@ def __init__( self.verbose = verbose self.n_jobs = n_jobs self.random_state = random_state + self.n_clusters = n_clusters self.cluster_centers_ = None self.labels_ = None @@ -116,7 +117,7 @@ def __init__( self._tslearn_kernel_k_means = None - super().__init__(n_clusters=n_clusters) + super().__init__() def _fit(self, X, y=None): """Fit time series clusterer to training data. diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 78f804a935..2aca96545e 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -1,7 +1,5 @@ """Base class for clustering.""" -from typing import Optional - __maintainer__ = [] __all__ = ["BaseClusterer"] @@ -28,8 +26,7 @@ class BaseClusterer(BaseCollectionEstimator): "fit_is_empty": False, } - def __init__(self, n_clusters: Optional[int] = None): - self.n_clusters = n_clusters + def __init__(self): # required for compatibility with some sklearn interfaces e.g. # CalibratedClassifierCV self._estimator_type = "clusterer" @@ -195,7 +192,10 @@ def _predict_proba(self, X) -> np.ndarray: for i, u in enumerate(unique): preds[preds == u] = i n_cases = len(preds) - n_clusters = self.n_clusters + if hasattr(self, "n_clusters"): + n_clusters = self.n_clusters + else: + n_clusters = len(np.unique(preds)) if n_clusters is None: n_clusters = int(max(preds)) + 1 dists = np.zeros((X.shape[0], n_clusters)) diff --git a/aeon/clustering/compose/_pipeline.py b/aeon/clustering/compose/_pipeline.py index 763f872e49..a946ebf2dc 100644 --- a/aeon/clustering/compose/_pipeline.py +++ b/aeon/clustering/compose/_pipeline.py @@ -88,9 +88,6 @@ def __init__(self, transformers, clusterer, random_state=None): def _fit(self, X, y=None): return super()._fit(X, y) - def _score(self, X, y=None): - raise NotImplementedError("Pipeline does not support scoring.") - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_fcn.py b/aeon/clustering/deep_learning/_ae_fcn.py index 0075817823..6d362c0c32 100644 --- a/aeon/clustering/deep_learning/_ae_fcn.py +++ b/aeon/clustering/deep_learning/_ae_fcn.py @@ -168,10 +168,10 @@ def __init__( self.save_last_model = save_last_model self.best_file_name = best_file_name self.random_state = random_state + self.n_clusters = n_clusters super().__init__( estimator=estimator, - n_clusters=n_clusters, clustering_algorithm=clustering_algorithm, clustering_params=clustering_params, batch_size=batch_size, @@ -320,12 +320,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_resnet.py b/aeon/clustering/deep_learning/_ae_resnet.py index 56d12cb487..a6b8f98f1f 100644 --- a/aeon/clustering/deep_learning/_ae_resnet.py +++ b/aeon/clustering/deep_learning/_ae_resnet.py @@ -178,12 +178,12 @@ def __init__( self.best_file_name = best_file_name self.last_file_name = last_file_name self.optimizer = optimizer + self.n_clusters = n_clusters self.history = None super().__init__( estimator=estimator, - n_clusters=n_clusters, clustering_algorithm=clustering_algorithm, clustering_params=clustering_params, batch_size=batch_size, @@ -336,12 +336,6 @@ def _fit(self, X): gc.collect() return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/base.py b/aeon/clustering/deep_learning/base.py index dc84d7c187..ced00367a6 100644 --- a/aeon/clustering/deep_learning/base.py +++ b/aeon/clustering/deep_learning/base.py @@ -42,7 +42,6 @@ class BaseDeepClusterer(BaseClusterer): def __init__( self, - n_clusters=None, estimator=None, clustering_algorithm="deprecated", clustering_params=None, @@ -50,7 +49,6 @@ def __init__( last_file_name="last_file", ): self.estimator = estimator - self.n_clusters = n_clusters self.clustering_algorithm = clustering_algorithm self.clustering_params = clustering_params self.batch_size = batch_size @@ -58,7 +56,7 @@ def __init__( self.model_ = None - super().__init__(n_clusters=n_clusters) + super().__init__() @abstractmethod def build_model(self, input_shape): diff --git a/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py b/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py index 64099bc87c..9e65866532 100644 --- a/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py +++ b/aeon/clustering/deep_learning/tests/test_deep_clusterer_base.py @@ -41,5 +41,3 @@ def test_base_deep_clusterer(estimator): ypred_proba = dummy_deep_clr.predict_proba(X) assert ypred_proba is not None assert len(ypred_proba[0]) == len(np.unique(y)) - score = dummy_deep_clr.score(X) - assert isinstance(score, np.float64) or isinstance(score, np.float32) diff --git a/aeon/clustering/dummy.py b/aeon/clustering/dummy.py index 523f0748bb..3c83d224e8 100644 --- a/aeon/clustering/dummy.py +++ b/aeon/clustering/dummy.py @@ -57,8 +57,9 @@ class DummyClusterer(BaseClusterer): def __init__(self, strategy="random", n_clusters=3, random_state=None): self.strategy = strategy self.random_state = random_state + self.n_clusters = n_clusters - super().__init__(n_clusters=n_clusters) + super().__init__() def _fit(self, X, y=None): """ @@ -122,19 +123,3 @@ def _predict(self, X, y=None) -> np.ndarray: return np.zeros(n_samples, dtype=int) else: raise ValueError("Unknown strategy type") - - def _score(self, X, y=None): - if self.strategy == "single_cluster": - centers = np.mean(X, axis=0).reshape(1, -1) - else: - centers = np.array( - [X[self.labels_ == i].mean(axis=0) for i in range(self.n_clusters)] - ) - - inertia = np.sum( - [ - np.sum((X[self.labels_ == i] - centers[i]) ** 2) - for i in range(len(centers)) - ] - ) - return inertia diff --git a/aeon/clustering/feature_based/_catch22.py b/aeon/clustering/feature_based/_catch22.py index 0b6b2e32fa..74d2d54674 100644 --- a/aeon/clustering/feature_based/_catch22.py +++ b/aeon/clustering/feature_based/_catch22.py @@ -214,9 +214,6 @@ def _predict_proba(self, X) -> np.ndarray: dists[i, preds[i]] = 1 return dists - def _score(self, X, y=None): - raise NotImplementedError("Catch22Clusterer does not support scoring.") - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/feature_based/_summary.py b/aeon/clustering/feature_based/_summary.py index 26bb296f0e..ddd3e3825e 100644 --- a/aeon/clustering/feature_based/_summary.py +++ b/aeon/clustering/feature_based/_summary.py @@ -169,6 +169,3 @@ def _predict_proba(self, X) -> np.ndarray: for i in range(n_cases): dists[i, preds[i]] = 1 return dists - - def _score(self, X, y=None): - raise NotImplementedError("SummaryClusterer does not support scoring.") diff --git a/aeon/clustering/feature_based/_tsfresh.py b/aeon/clustering/feature_based/_tsfresh.py index 503638e239..c735602ff4 100644 --- a/aeon/clustering/feature_based/_tsfresh.py +++ b/aeon/clustering/feature_based/_tsfresh.py @@ -211,9 +211,6 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: dists[i, preds[i]] = 1 return dists - def _score(self, X: np.ndarray, y: Optional[np.ndarray] = None): - raise NotImplementedError("TSFreshClusterer does not support scoring.") - @classmethod def _get_test_params(cls, parameter_set: str = "default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/tests/test_base.py b/aeon/clustering/tests/test_base.py index a40405e5b5..8a8fb81b2d 100644 --- a/aeon/clustering/tests/test_base.py +++ b/aeon/clustering/tests/test_base.py @@ -51,9 +51,6 @@ def _predict(self, X): """Predict dummy.""" return np.zeros(shape=(len(X),), dtype=int) - def _score(self, X, y=None): - return 1.0 - def test_base_clusterer(): """Test with no clusters.""" diff --git a/aeon/clustering/tests/test_dummy.py b/aeon/clustering/tests/test_dummy.py index 0a42f039c2..1a94d91ff6 100644 --- a/aeon/clustering/tests/test_dummy.py +++ b/aeon/clustering/tests/test_dummy.py @@ -17,18 +17,3 @@ def test_dummy_clusterer(strategy): assert len(preds) == 3 assert np.all(np.array([(pred < 3) for pred in preds])) assert np.all(np.array([(pred >= 0) for pred in preds])) - - -def test_dummy_clusterer_score(): - """Test score method of the dummy clusterer.""" - model = DummyClusterer(strategy="random") - data = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - model.fit(data) - score = model.score(data) - assert score is not None - - model = DummyClusterer(strategy="single_cluster") - model.fit(data) - score = model.score(data) - assert score is not None - assert score == 54.0 diff --git a/aeon/clustering/tests/test_k_shape.py b/aeon/clustering/tests/test_k_shape.py index 4e82162e7e..8af9743004 100644 --- a/aeon/clustering/tests/test_k_shape.py +++ b/aeon/clustering/tests/test_k_shape.py @@ -14,7 +14,6 @@ expected_iters = 2 expected_labels = [0, 2, 1, 1, 1] -expected_score = 0.5645477840468736 @pytest.mark.skipif( @@ -31,10 +30,8 @@ def test_kshapes(): kshapes = TimeSeriesKShape(random_state=1, n_clusters=3) kshapes.fit(X_train[0:max_train]) test_shape_result = kshapes.predict(X_test[0:max_train]) - score = kshapes.score(X_test[0:max_train]) proba = kshapes.predict_proba(X_test[0:max_train]) assert np.array_equal(test_shape_result, expected_results) - np.testing.assert_almost_equal(score, expected_score) assert kshapes.n_iter_ == expected_iters assert np.array_equal(kshapes.labels_, expected_labels) assert proba.shape == (max_train, 3) diff --git a/aeon/clustering/tests/test_kernel_k_means.py b/aeon/clustering/tests/test_kernel_k_means.py index e46794b26a..f4af21f4f5 100644 --- a/aeon/clustering/tests/test_kernel_k_means.py +++ b/aeon/clustering/tests/test_kernel_k_means.py @@ -9,8 +9,6 @@ expected_labels = [0, 2, 1, 2, 0] -expected_score = 4.0 - expected_iters = 2 expected_results = [0, 0, 0, 0, 0] @@ -30,11 +28,9 @@ def test_kernel_k_means(): kernel_kmeans = TimeSeriesKernelKMeans(random_state=1, n_clusters=3) kernel_kmeans.fit(X_train[0:max_train]) test_shape_result = kernel_kmeans.predict(X_test[0:max_train]) - score = kernel_kmeans.score(X_test[0:max_train]) proba = kernel_kmeans.predict_proba(X_test[0:max_train]) assert np.array_equal(test_shape_result, expected_results) - np.testing.assert_almost_equal(score, expected_score) assert kernel_kmeans.n_iter_ == expected_iters assert np.array_equal(kernel_kmeans.labels_, expected_labels) assert proba.shape == (max_train, 3) diff --git a/aeon/testing/mock_estimators/_mock_clusterers.py b/aeon/testing/mock_estimators/_mock_clusterers.py index 0563129909..ea6772c092 100644 --- a/aeon/testing/mock_estimators/_mock_clusterers.py +++ b/aeon/testing/mock_estimators/_mock_clusterers.py @@ -1,5 +1,3 @@ -from typing import Optional - import numpy as np from aeon.clustering.base import BaseClusterer @@ -9,8 +7,8 @@ class MockCluster(BaseClusterer): """Mock Cluster for testing base class fit/predict.""" - def __init__(self, n_clusters: Optional[int] = None): - super().__init__(n_clusters) + def __init__(self): + super().__init__() def _fit(self, X): """Mock fit.""" @@ -25,9 +23,6 @@ def _predict_proba(self, X): y = np.random.rand(len(X)) return y - def _score(self, X, y): - return np.random.randn(1) - class MockDeepClusterer(BaseDeepClusterer): """Mock Deep Clusterer for testing empty base deep class save utilities.""" From 24732d2308c6e9ad2290162715510e61e0dabbe5 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Mon, 4 Nov 2024 11:25:37 +0000 Subject: [PATCH 06/25] fit_predict --- aeon/clustering/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 780fd5d501..bf73c2a136 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -122,6 +122,7 @@ def predict_proba(self, X) -> np.ndarray: self._check_shape(X) return self._predict_proba(X) + @final def fit_predict(self, X, y=None) -> np.ndarray: """Compute cluster centers and predict cluster index for each time series. @@ -159,6 +160,8 @@ def _fit_predict(self, X, y=None) -> np.ndarray: Index of the cluster each time series in X belongs to. """ self.fit(X) + if hasattr(self, "labels_"): + return self.labels_ return self.predict(X) def _predict_proba(self, X) -> np.ndarray: From 78c67c831f9b80d7a0dd9905b8c378fb7f41b14a Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 5 Nov 2024 11:32:08 +0000 Subject: [PATCH 07/25] clustering deprecations --- aeon/clustering/__init__.py | 2 - aeon/clustering/_k_means.py | 12 -- aeon/clustering/_k_shapes.py | 185 -------------------- aeon/clustering/deep_learning/_ae_bgru.py | 3 - aeon/clustering/deep_learning/_ae_fcn.py | 4 - aeon/clustering/deep_learning/_ae_resnet.py | 4 - aeon/clustering/deep_learning/base.py | 22 --- 7 files changed, 232 deletions(-) delete mode 100644 aeon/clustering/_k_shapes.py diff --git a/aeon/clustering/__init__.py b/aeon/clustering/__init__.py index 084778aed6..2eec5142cf 100644 --- a/aeon/clustering/__init__.py +++ b/aeon/clustering/__init__.py @@ -7,7 +7,6 @@ "TimeSeriesCLARANS", "TimeSeriesKMeans", "TimeSeriesKShape", - "TimeSeriesKShapes", "TimeSeriesKernelKMeans", "ElasticSOM", "KSpectralCentroid", @@ -21,7 +20,6 @@ from aeon.clustering._k_medoids import TimeSeriesKMedoids from aeon.clustering._k_sc import KSpectralCentroid from aeon.clustering._k_shape import TimeSeriesKShape -from aeon.clustering._k_shapes import TimeSeriesKShapes from aeon.clustering._kernel_k_means import TimeSeriesKernelKMeans from aeon.clustering.base import BaseClusterer from aeon.clustering.dummy import DummyClusterer diff --git a/aeon/clustering/_k_means.py b/aeon/clustering/_k_means.py index 550d38944e..97806915bc 100644 --- a/aeon/clustering/_k_means.py +++ b/aeon/clustering/_k_means.py @@ -169,20 +169,8 @@ def __init__( averaging_method: Union[str, Callable[[np.ndarray], np.ndarray]] = "ba", distance_params: Optional[dict] = None, average_params: Optional[dict] = None, - init_algorithm: Optional[Union[str, np.ndarray]] = None, ): self.init = init - self.init_algorithm = init_algorithm - if init_algorithm is not None: - import warnings - - warnings.warn( - "The 'init_algorithm' parameter is deprecated and will be " - "removed in a future. Version Use 'init' instead.", - DeprecationWarning, - stacklevel=2, - ) - self.init = self.init_algorithm self.distance = distance self.n_init = n_init self.max_iter = max_iter diff --git a/aeon/clustering/_k_shapes.py b/aeon/clustering/_k_shapes.py deleted file mode 100644 index cdad58032a..0000000000 --- a/aeon/clustering/_k_shapes.py +++ /dev/null @@ -1,185 +0,0 @@ -"""Time series kshapes.""" - -from typing import Optional, Union - -import numpy as np -from deprecated.sphinx import deprecated -from numpy.random import RandomState - -from aeon.clustering.base import BaseClusterer - - -# TODO: remove in v1.0.0 -@deprecated( - version="1.0.0", - reason="TimeSeriesKShapes class has been renamed to TimeSeriesKShape. " - "The TimeSeriesKShapes version will be removed in version 1.0.0.", - category=FutureWarning, -) -class TimeSeriesKShapes(BaseClusterer): - """Kshape algorithm: wrapper of the ``tslearn`` implementation. - - Parameters - ---------- - n_clusters: int, default=8 - The number of clusters to form as well as the number of - centroids to generate. - init_algorithm: str or np.ndarray, default='random' - Method for initializing cluster centres. Any of the following are valid: - ['random']. Or a np.ndarray of shape (n_clusters, n_channels, n_timepoints) - and gives the initial cluster centres. - n_init: int, default=10 - Number of times the k-means algorithm will be run with different - centroid seeds. The final result will be the best output of n_init - consecutive runs in terms of inertia. - max_iter: int, default=30 - Maximum number of iterations of the k-means algorithm for a single - run. - tol: float, default=1e-4 - Relative tolerance with regards to Frobenius norm of the difference - in the cluster centres of two consecutive iterations to declare - convergence. - verbose: bool, default=False - Verbosity mode. - random_state: int or np.random.RandomState instance or None, default=None - Determines random number generation for centroid initialization. - - Attributes - ---------- - labels_: np.ndarray (1d array of shape (n_cases,)) - Labels that is the index each time series belongs to. - inertia_: float - Sum of squared distances of samples to their closest cluster centre, weighted by - the sample weights if provided. - n_iter_: int - Number of iterations run. - - Examples - -------- - >>> from aeon.clustering import TimeSeriesKShapes - >>> from aeon.datasets import load_basic_motions - >>> # Load data - >>> X_train, y_train = load_basic_motions(split="TRAIN")[0:10] - >>> X_test, y_test = load_basic_motions(split="TEST")[0:10] - >>> # Example of KShapes clustering - >>> ks = TimeSeriesKShapes(n_clusters=3, random_state=1) # doctest: +SKIP - >>> ks.fit(X_train) # doctest: +SKIP - TimeSeriesKShapes(n_clusters=3, random_state=1) - >>> preds = ks.predict(X_test) # doctest: +SKIP - """ - - _tags = { - "capability:multivariate": True, - "python_dependencies": "tslearn", - "algorithm_type": "distance", - } - - def __init__( - self, - n_clusters: int = 8, - init_algorithm: Union[str, np.ndarray] = "random", - n_init: int = 10, - max_iter: int = 300, - tol: float = 1e-4, - verbose: bool = False, - random_state: Optional[Union[int, RandomState]] = None, - ): - self.init_algorithm = init_algorithm - self.n_init = n_init - self.max_iter = max_iter - self.tol = tol - self.verbose = verbose - self.random_state = random_state - - self.cluster_centers_ = None - self.labels_ = None - self.inertia_ = None - self.n_iter_ = 0 - - self._tslearn_k_shapes = None - - super().__init__(n_clusters=n_clusters) - - def _fit(self, X, y=None): - """Fit time series clusterer to training data. - - Parameters - ---------- - X: np.ndarray, of shape (n_cases, n_channels, n_timepoints) or - (n_cases, n_timepoints) - A collection of time series instances. - y: ignored, exists for API consistency reasons. - - Returns - ------- - self: - Fitted estimator. - """ - from tslearn.clustering import KShape - - self._tslearn_k_shapes = KShape( - n_clusters=self.n_clusters, - max_iter=self.max_iter, - tol=self.tol, - random_state=self.random_state, - n_init=self.n_init, - verbose=self.verbose, - init=self.init_algorithm, - ) - - _X = X.swapaxes(1, 2) - - self._tslearn_k_shapes.fit(_X) - self._cluster_centers = self._tslearn_k_shapes.cluster_centers_ - self.labels_ = self._tslearn_k_shapes.labels_ - self.inertia_ = self._tslearn_k_shapes.inertia_ - self.n_iter_ = self._tslearn_k_shapes.n_iter_ - - def _predict(self, X, y=None) -> np.ndarray: - """Predict the closest cluster each sample in X belongs to. - - Parameters - ---------- - X: np.ndarray, of shape (n_cases, n_channels, n_timepoints) or - (n_cases, n_timepoints) - A collection of time series instances. - y: ignored, exists for API consistency reasons. - - Returns - ------- - np.ndarray (1d array of shape (n_cases,)) - Index of the cluster each time series in X belongs to. - """ - _X = X.swapaxes(1, 2) - return self._tslearn_k_shapes.predict(_X) - - @classmethod - def _get_test_params(cls, parameter_set="default"): - """Return testing parameter settings for the estimator. - - Parameters - ---------- - parameter_set : str, default="default" - Name of the set of test parameters to return, for use in tests. If no - special parameters are defined for a value, will return `"default"` set. - - - Returns - ------- - params : dict or list of dict, default={} - Parameters to create testing instances of the class - Each dict are parameters to construct an "interesting" test instance, i.e., - `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance. - """ - return { - "n_clusters": 2, - "init_algorithm": "random", - "n_init": 1, - "max_iter": 1, - "tol": 1e-4, - "verbose": False, - "random_state": 1, - } - - def _score(self, X, y=None): - return np.abs(self.inertia_) diff --git a/aeon/clustering/deep_learning/_ae_bgru.py b/aeon/clustering/deep_learning/_ae_bgru.py index 9b7df32716..e400bdaa05 100644 --- a/aeon/clustering/deep_learning/_ae_bgru.py +++ b/aeon/clustering/deep_learning/_ae_bgru.py @@ -22,8 +22,6 @@ class AEBiGRUClusterer(BaseDeepClusterer): ---------- n_clusters : int, default=None Number of clusters for the deep learnign model. - clustering_algorithm : str, default="deprecated" - Use 'estimator' parameter instead. clustering_params : dict, default=None Use 'estimator' parameter instead. estimator : aeon clusterer, default=None @@ -100,7 +98,6 @@ class AEBiGRUClusterer(BaseDeepClusterer): def __init__( self, n_clusters=None, - clustering_algorithm="deprecated", estimator=None, clustering_params=None, latent_space_dim=128, diff --git a/aeon/clustering/deep_learning/_ae_fcn.py b/aeon/clustering/deep_learning/_ae_fcn.py index 70b55bb420..15df93af8e 100644 --- a/aeon/clustering/deep_learning/_ae_fcn.py +++ b/aeon/clustering/deep_learning/_ae_fcn.py @@ -27,8 +27,6 @@ class AEFCNClusterer(BaseDeepClusterer): An aeon estimator to be built using the transformed data. Defaults to aeon TimeSeriesKMeans() with euclidean distance and mean averaging method and n_clusters set to 2. - clustering_algorithm : str, default="deprecated" - Please use 'estimator' parameter. clustering_params : dict, default=None Please use 'estimator' parameter. latent_space_dim : int, default=128 @@ -124,7 +122,6 @@ def __init__( self, n_clusters=None, estimator=None, - clustering_algorithm="deprecated", clustering_params=None, latent_space_dim=128, temporal_latent_space=False, @@ -177,7 +174,6 @@ def __init__( super().__init__( estimator=estimator, n_clusters=n_clusters, - clustering_algorithm=clustering_algorithm, clustering_params=clustering_params, batch_size=batch_size, last_file_name=last_file_name, diff --git a/aeon/clustering/deep_learning/_ae_resnet.py b/aeon/clustering/deep_learning/_ae_resnet.py index d9f3ebd52e..2a7f308d4a 100644 --- a/aeon/clustering/deep_learning/_ae_resnet.py +++ b/aeon/clustering/deep_learning/_ae_resnet.py @@ -30,8 +30,6 @@ class AEResNetClusterer(BaseDeepClusterer): An aeon estimator to be built using the transformed data. Defaults to aeon TimeSeriesKMeans() with euclidean distance and mean averaging method and n_clusters set to 2. - clustering_algorithm : str, default="deprecated" - Please use 'estimator' parameter. clustering_params : dict, default=None Please use 'estimator' parameter. latent_space_dim : int, default=128 @@ -134,7 +132,6 @@ def __init__( n_clusters=None, estimator=None, n_residual_blocks=3, - clustering_algorithm="deprecated", clustering_params=None, n_conv_per_residual_block=3, n_filters=None, @@ -188,7 +185,6 @@ def __init__( super().__init__( estimator=estimator, n_clusters=n_clusters, - clustering_algorithm=clustering_algorithm, clustering_params=clustering_params, batch_size=batch_size, last_file_name=last_file_name, diff --git a/aeon/clustering/deep_learning/base.py b/aeon/clustering/deep_learning/base.py index dc84d7c187..ba4d4ac00a 100644 --- a/aeon/clustering/deep_learning/base.py +++ b/aeon/clustering/deep_learning/base.py @@ -19,8 +19,6 @@ class BaseDeepClusterer(BaseClusterer): An aeon estimator to be built using the transformed data. Defaults to aeon TimeSeriesKMeans() with euclidean distance and mean averaging method and n_clusters set to 2. - clustering_algorithm : str, default="deprecated" - Please use 'estimator' parameter. clustering_params : dict, default=None Please use 'estimator' parameter. batch_size : int, default = 40 @@ -44,14 +42,12 @@ def __init__( self, n_clusters=None, estimator=None, - clustering_algorithm="deprecated", clustering_params=None, batch_size=32, last_file_name="last_file", ): self.estimator = estimator self.n_clusters = n_clusters - self.clustering_algorithm = clustering_algorithm self.clustering_params = clustering_params self.batch_size = batch_size self.last_file_name = last_file_name @@ -109,8 +105,6 @@ def _fit_clustering(self, X): X : np.ndarray, shape=(n_cases, n_timepoints, n_channels) The input time series. """ - import warnings - self._estimator = ( TimeSeriesKMeans( n_clusters=2, distance="euclidean", averaging_method="mean" @@ -120,22 +114,6 @@ def _fit_clustering(self, X): ) # to be removed in 1.0.0 - if ( - self.clustering_algorithm != "deprecated" - or self.clustering_params is not None - or self.n_clusters is not None - ): - warnings.warn( - "The 'n_clusters' 'clustering_algorithm' and " - "'clustering_params' parameters " - "will be removed in v1.0.0. " - "Their usage will not have an effect, " - "please use the new 'estimator' parameter to directly " - "give an aeon clusterer as input.", - FutureWarning, - stacklevel=2, - ) - latent_space = self.model_.layers[1].predict(X) self._estimator.fit(X=latent_space) From 491117f3972fdb17bb8c198a21315aca25ad73a8 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 5 Nov 2024 12:43:12 +0000 Subject: [PATCH 08/25] notebook --- examples/networks/deep_learning.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/networks/deep_learning.ipynb b/examples/networks/deep_learning.ipynb index 43ec5b0353..cbad5df130 100644 --- a/examples/networks/deep_learning.ipynb +++ b/examples/networks/deep_learning.ipynb @@ -281,7 +281,7 @@ "aefcn = AEFCNClusterer(\n", " n_clusters=2,\n", " temporal_latent_space=False,\n", - " clustering_algorithm=\"kmeans\",\n", + " estimator=\"kmeans\",\n", " n_epochs=10,\n", ")\n", "\n", From e83387c1405bd57ec8f96c0a90d7dae9913cf283 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 6 Nov 2024 15:39:49 +0000 Subject: [PATCH 09/25] fixed predict proba --- aeon/clustering/base.py | 2 +- aeon/clustering/deep_learning/_ae_bgru.py | 2 +- aeon/clustering/feature_based/_catch22.py | 13 +------------ aeon/clustering/feature_based/_summary.py | 13 +------------ aeon/clustering/feature_based/_tsfresh.py | 13 +------------ 5 files changed, 5 insertions(+), 38 deletions(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index bf73c2a136..92989a4c2c 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -201,7 +201,7 @@ def _predict_proba(self, X) -> np.ndarray: n_clusters = len(np.unique(preds)) if n_clusters is None: n_clusters = int(max(preds)) + 1 - dists = np.zeros((X.shape[0], n_clusters)) + dists = np.zeros((len(X), n_clusters)) for i in range(n_cases): dists[i, preds[i]] = 1 return dists diff --git a/aeon/clustering/deep_learning/_ae_bgru.py b/aeon/clustering/deep_learning/_ae_bgru.py index 9b7df32716..c79632d504 100644 --- a/aeon/clustering/deep_learning/_ae_bgru.py +++ b/aeon/clustering/deep_learning/_ae_bgru.py @@ -141,9 +141,9 @@ def __init__( self.best_file_name = best_file_name self.random_state = random_state self.estimator = estimator + self.n_clusters = n_clusters super().__init__( - n_clusters=n_clusters, estimator=estimator, batch_size=batch_size, last_file_name=last_file_name, diff --git a/aeon/clustering/feature_based/_catch22.py b/aeon/clustering/feature_based/_catch22.py index 74d2d54674..e365e04917 100644 --- a/aeon/clustering/feature_based/_catch22.py +++ b/aeon/clustering/feature_based/_catch22.py @@ -201,18 +201,7 @@ def _predict_proba(self, X) -> np.ndarray: if callable(m): return self._estimator.predict_proba(self._transformer.transform(X)) else: - preds = self._estimator.predict(self._transformer.transform(X)) - unique = np.unique(preds) - for i, u in enumerate(unique): - preds[preds == u] = i - n_cases = len(preds) - n_clusters = self.n_clusters - if n_clusters is None: - n_clusters = int(max(preds)) + 1 - dists = np.zeros((len(X), n_clusters)) - for i in range(n_cases): - dists[i, preds[i]] = 1 - return dists + return super()._predict_proba(X) @classmethod def _get_test_params(cls, parameter_set="default"): diff --git a/aeon/clustering/feature_based/_summary.py b/aeon/clustering/feature_based/_summary.py index ddd3e3825e..616582da2a 100644 --- a/aeon/clustering/feature_based/_summary.py +++ b/aeon/clustering/feature_based/_summary.py @@ -157,15 +157,4 @@ def _predict_proba(self, X) -> np.ndarray: if callable(m): return self._estimator.predict_proba(self._transformer.transform(X)) else: - preds = self._estimator.predict(self._transformer.transform(X)) - unique = np.unique(preds) - for i, u in enumerate(unique): - preds[preds == u] = i - n_cases = len(preds) - n_clusters = self.n_clusters - if n_clusters is None: - n_clusters = int(max(preds)) + 1 - dists = np.zeros((X.shape[0], n_clusters)) - for i in range(n_cases): - dists[i, preds[i]] = 1 - return dists + return super()._predict_proba(X) diff --git a/aeon/clustering/feature_based/_tsfresh.py b/aeon/clustering/feature_based/_tsfresh.py index c735602ff4..4987e9bfda 100644 --- a/aeon/clustering/feature_based/_tsfresh.py +++ b/aeon/clustering/feature_based/_tsfresh.py @@ -198,18 +198,7 @@ def _predict_proba(self, X: np.ndarray) -> np.ndarray: if callable(m): return self._estimator.predict_proba(self._transformer.transform(X)) else: - preds = self._estimator.predict(self._transformer.transform(X)) - unique = np.unique(preds) - for i, u in enumerate(unique): - preds[preds == u] = i - n_cases = len(preds) - n_clusters = self.n_clusters - if n_clusters is None: - n_clusters = int(max(preds)) + 1 - dists = np.zeros((X.shape[0], n_clusters)) - for i in range(n_cases): - dists[i, preds[i]] = 1 - return dists + return super()._predict_proba(X) @classmethod def _get_test_params(cls, parameter_set: str = "default"): From 30fbda933eabfa1866adb58aacbd335c058f33aa Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 6 Nov 2024 16:05:46 +0000 Subject: [PATCH 10/25] fixed dnn tests --- aeon/testing/mock_estimators/_mock_clusterers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aeon/testing/mock_estimators/_mock_clusterers.py b/aeon/testing/mock_estimators/_mock_clusterers.py index ea6772c092..c44d76458f 100644 --- a/aeon/testing/mock_estimators/_mock_clusterers.py +++ b/aeon/testing/mock_estimators/_mock_clusterers.py @@ -30,7 +30,6 @@ class MockDeepClusterer(BaseDeepClusterer): def __init__(self, estimator=None, last_file_name="last_file"): self.last_file_name = last_file_name super().__init__( - n_clusters=None, estimator=estimator, last_file_name=last_file_name, clustering_params={"n_init": 1, "averaging_method": "mean"}, From 3557ac4e1711d5db8145bbf39eaecfa6a7ae949f Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 6 Nov 2024 16:19:44 +0000 Subject: [PATCH 11/25] fixed dnn tests --- aeon/clustering/deep_learning/base.py | 1 - 1 file changed, 1 deletion(-) diff --git a/aeon/clustering/deep_learning/base.py b/aeon/clustering/deep_learning/base.py index ced00367a6..6c9ed5d63f 100644 --- a/aeon/clustering/deep_learning/base.py +++ b/aeon/clustering/deep_learning/base.py @@ -121,7 +121,6 @@ def _fit_clustering(self, X): if ( self.clustering_algorithm != "deprecated" or self.clustering_params is not None - or self.n_clusters is not None ): warnings.warn( "The 'n_clusters' 'clustering_algorithm' and " From 2a5807d3dd992ca0267612f3c4d0fcad005d7abe Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 6 Nov 2024 17:23:25 +0000 Subject: [PATCH 12/25] fix notebook --- examples/networks/deep_learning.ipynb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/networks/deep_learning.ipynb b/examples/networks/deep_learning.ipynb index 43ec5b0353..130b7166d7 100644 --- a/examples/networks/deep_learning.ipynb +++ b/examples/networks/deep_learning.ipynb @@ -288,9 +288,7 @@ "aefcn.fit(X=xtrain)\n", "ypred = aefcn.predict(X=xtest)\n", "print(\"Predictions: \", ypred[0:5])\n", - "print(\"Ground Truth: \", ytest[0:5])\n", - "print()\n", - "print(\"Score : \", aefcn.score(X=xtest))" + "print(\"Ground Truth: \", ytest[0:5])" ] }, { From 3f80902ae299fb35fcefe441c6f5cc0ae1aa915e Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 6 Nov 2024 17:39:52 +0000 Subject: [PATCH 13/25] fixed --- aeon/clustering/deep_learning/_ae_bgru.py | 1 + 1 file changed, 1 insertion(+) diff --git a/aeon/clustering/deep_learning/_ae_bgru.py b/aeon/clustering/deep_learning/_ae_bgru.py index 1df07fa344..457ae6581c 100644 --- a/aeon/clustering/deep_learning/_ae_bgru.py +++ b/aeon/clustering/deep_learning/_ae_bgru.py @@ -140,6 +140,7 @@ def __init__( self.save_last_model = save_last_model self.best_file_name = best_file_name self.random_state = random_state + self.n_clusters = n_clusters super().__init__( clustering_algorithm=clustering_algorithm, From a591c31c111495fefdfe6ef27350e7b80fce1a7e Mon Sep 17 00:00:00 2001 From: chrisholder Date: Fri, 15 Nov 2024 16:52:50 +0100 Subject: [PATCH 14/25] removed deep learner n_clusters and assert labels_ exists --- aeon/clustering/deep_learning/_ae_abgru.py | 4 -- aeon/clustering/deep_learning/_ae_bgru.py | 4 -- aeon/clustering/deep_learning/_ae_dcnn.py | 4 -- aeon/clustering/deep_learning/_ae_drnn.py | 4 -- aeon/clustering/deep_learning/_ae_fcn.py | 4 -- aeon/clustering/deep_learning/_ae_resnet.py | 4 -- .../_yield_clustering_checks.py | 41 +++++++++++++++++++ 7 files changed, 41 insertions(+), 24 deletions(-) diff --git a/aeon/clustering/deep_learning/_ae_abgru.py b/aeon/clustering/deep_learning/_ae_abgru.py index 02943abcf7..5c8866a9e9 100644 --- a/aeon/clustering/deep_learning/_ae_abgru.py +++ b/aeon/clustering/deep_learning/_ae_abgru.py @@ -20,8 +20,6 @@ class AEAttentionBiGRUClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Number of clusters for the deep learnign model. clustering_algorithm : str, default="deprecated" Use 'estimator' parameter instead. clustering_params : dict, default=None @@ -100,7 +98,6 @@ class AEAttentionBiGRUClusterer(BaseDeepClusterer): def __init__( self, - n_clusters=None, estimator=None, clustering_algorithm="deprecated", clustering_params=None, @@ -143,7 +140,6 @@ def __init__( self.random_state = random_state super().__init__( - n_clusters=n_clusters, clustering_algorithm=clustering_algorithm, clustering_params=clustering_params, estimator=estimator, diff --git a/aeon/clustering/deep_learning/_ae_bgru.py b/aeon/clustering/deep_learning/_ae_bgru.py index 457ae6581c..59e4fb71ca 100644 --- a/aeon/clustering/deep_learning/_ae_bgru.py +++ b/aeon/clustering/deep_learning/_ae_bgru.py @@ -20,8 +20,6 @@ class AEBiGRUClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Number of clusters for the deep learnign model. clustering_algorithm : str, default="deprecated" Use 'estimator' parameter instead. clustering_params : dict, default=None @@ -99,7 +97,6 @@ class AEBiGRUClusterer(BaseDeepClusterer): def __init__( self, - n_clusters=None, clustering_algorithm="deprecated", estimator=None, clustering_params=None, @@ -140,7 +137,6 @@ def __init__( self.save_last_model = save_last_model self.best_file_name = best_file_name self.random_state = random_state - self.n_clusters = n_clusters super().__init__( clustering_algorithm=clustering_algorithm, diff --git a/aeon/clustering/deep_learning/_ae_dcnn.py b/aeon/clustering/deep_learning/_ae_dcnn.py index 486457be0d..e9171c5c01 100644 --- a/aeon/clustering/deep_learning/_ae_dcnn.py +++ b/aeon/clustering/deep_learning/_ae_dcnn.py @@ -19,8 +19,6 @@ class AEDCNNClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Number of clusters for the deep learnign model. clustering_algorithm : str, default="deprecated" Use 'estimator' parameter instead. clustering_params : dict, default=None @@ -113,7 +111,6 @@ class AEDCNNClusterer(BaseDeepClusterer): def __init__( self, - n_clusters=None, estimator=None, clustering_algorithm="deprecated", clustering_params=None, @@ -164,7 +161,6 @@ def __init__( self.random_state = random_state super().__init__( - n_clusters=n_clusters, clustering_params=clustering_params, clustering_algorithm=clustering_algorithm, estimator=estimator, diff --git a/aeon/clustering/deep_learning/_ae_drnn.py b/aeon/clustering/deep_learning/_ae_drnn.py index 61777ba78c..2f22c15150 100644 --- a/aeon/clustering/deep_learning/_ae_drnn.py +++ b/aeon/clustering/deep_learning/_ae_drnn.py @@ -24,8 +24,6 @@ class AEDRNNClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Number of clusters for the deep learnign model. clustering_algorithm : str, default="deprecated" Please use the 'estimator' parameter. estimator : aeon clusterer, default=None @@ -114,7 +112,6 @@ class AEDRNNClusterer(BaseDeepClusterer): def __init__( self, - n_clusters=None, estimator=None, clustering_algorithm="deprecated", clustering_params=None, @@ -167,7 +164,6 @@ def __init__( self.random_state = random_state super().__init__( - n_clusters=n_clusters, estimator=estimator, clustering_algorithm=clustering_algorithm, clustering_params=clustering_params, diff --git a/aeon/clustering/deep_learning/_ae_fcn.py b/aeon/clustering/deep_learning/_ae_fcn.py index 1787284281..f3ab250f48 100644 --- a/aeon/clustering/deep_learning/_ae_fcn.py +++ b/aeon/clustering/deep_learning/_ae_fcn.py @@ -21,8 +21,6 @@ class AEFCNClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Please use 'estimator' parameter. estimator : aeon clusterer, default=None An aeon estimator to be built using the transformed data. Defaults to aeon TimeSeriesKMeans() with euclidean distance @@ -122,7 +120,6 @@ class AEFCNClusterer(BaseDeepClusterer): def __init__( self, - n_clusters=None, estimator=None, clustering_algorithm="deprecated", clustering_params=None, @@ -173,7 +170,6 @@ def __init__( self.save_last_model = save_last_model self.best_file_name = best_file_name self.random_state = random_state - self.n_clusters = n_clusters super().__init__( estimator=estimator, diff --git a/aeon/clustering/deep_learning/_ae_resnet.py b/aeon/clustering/deep_learning/_ae_resnet.py index 3674adaa2c..aed5900d4e 100644 --- a/aeon/clustering/deep_learning/_ae_resnet.py +++ b/aeon/clustering/deep_learning/_ae_resnet.py @@ -24,8 +24,6 @@ class AEResNetClusterer(BaseDeepClusterer): Parameters ---------- - n_clusters : int, default=None - Please use 'estimator' parameter. estimator : aeon clusterer, default=None An aeon estimator to be built using the transformed data. Defaults to aeon TimeSeriesKMeans() with euclidean distance @@ -131,7 +129,6 @@ class method save_last_model_to_file. def __init__( self, - n_clusters=None, estimator=None, n_residual_blocks=3, clustering_algorithm="deprecated", @@ -182,7 +179,6 @@ def __init__( self.best_file_name = best_file_name self.last_file_name = last_file_name self.optimizer = optimizer - self.n_clusters = n_clusters self.history = None diff --git a/aeon/testing/estimator_checking/_yield_clustering_checks.py b/aeon/testing/estimator_checking/_yield_clustering_checks.py index 4843f13056..4c704f55d7 100644 --- a/aeon/testing/estimator_checking/_yield_clustering_checks.py +++ b/aeon/testing/estimator_checking/_yield_clustering_checks.py @@ -7,6 +7,7 @@ from aeon.base._base import _clone_estimator from aeon.clustering.deep_learning import BaseDeepClusterer from aeon.testing.testing_data import FULL_TEST_DATA_DICT +from aeon.utils.validation import get_n_cases def _yield_clustering_checks(estimator_class, estimator_instances, datatypes): @@ -26,6 +27,10 @@ def _yield_clustering_checks(estimator_class, estimator_instances, datatypes): estimator=estimator, datatype=datatypes[i][0], ) + for datatype in datatypes[i]: + yield partial( + check_clusterer_output, estimator=estimator, datatype=datatype + ) def check_clusterer_tags_consistent(estimator_class): @@ -82,3 +87,39 @@ def check_clustering_random_state_deep_learning(estimator, datatype): _weight2 = np.asarray(weights2[j]) np.testing.assert_almost_equal(_weight1, _weight2, 4) + + +def check_clusterer_output(estimator, datatype): + """Test clusterer outputs the correct data types and values. + + Test predict produces a np.array or pd.Series with only values seen in the train + data, and that predict_proba probability estimates add up to one. + """ + estimator = _clone_estimator(estimator) + + unique_labels = np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]) + + # run fit and predict + estimator.fit( + FULL_TEST_DATA_DICT[datatype]["train"][0], + FULL_TEST_DATA_DICT[datatype]["train"][1], + ) + assert hasattr(estimator, "labels_") + assert isinstance(estimator.labels_, np.ndarray) + + y_pred = estimator.predict(FULL_TEST_DATA_DICT[datatype]["test"][0]) + + # check predict + assert isinstance(y_pred, np.ndarray) + assert y_pred.shape == (get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]),) + assert np.all(np.isin(np.unique(y_pred), unique_labels)) + + # check predict proba (all classifiers have predict_proba by default) + y_proba = estimator.predict_proba(FULL_TEST_DATA_DICT[datatype]["test"][0]) + + assert isinstance(y_proba, np.ndarray) + assert y_proba.shape == ( + get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]), + len(unique_labels), + ) + np.testing.assert_almost_equal(y_proba.sum(axis=1), 1, decimal=4) From 2ee81ba8e574ab62a909ebb0abe37cd69e86543b Mon Sep 17 00:00:00 2001 From: chrisholder Date: Sun, 17 Nov 2024 21:25:22 +0100 Subject: [PATCH 15/25] cont --- aeon/clustering/feature_based/_catch22.py | 1 + aeon/clustering/feature_based/_summary.py | 2 ++ aeon/clustering/feature_based/_tsfresh.py | 1 + .../testing/estimator_checking/_yield_clustering_checks.py | 7 ------- 4 files changed, 4 insertions(+), 7 deletions(-) diff --git a/aeon/clustering/feature_based/_catch22.py b/aeon/clustering/feature_based/_catch22.py index e365e04917..33f0b79bc5 100644 --- a/aeon/clustering/feature_based/_catch22.py +++ b/aeon/clustering/feature_based/_catch22.py @@ -164,6 +164,7 @@ def _fit(self, X, y=None): X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) + self.labels_ = self._estimator.labels_ return self diff --git a/aeon/clustering/feature_based/_summary.py b/aeon/clustering/feature_based/_summary.py index 616582da2a..309d3ac92f 100644 --- a/aeon/clustering/feature_based/_summary.py +++ b/aeon/clustering/feature_based/_summary.py @@ -121,6 +121,8 @@ def _fit(self, X, y=None): X_t = self._transformer.fit_transform(X, y) self._estimator.fit(X_t, y) + self.labels_ = self._estimator.labels_ + return self def _predict(self, X) -> np.ndarray: diff --git a/aeon/clustering/feature_based/_tsfresh.py b/aeon/clustering/feature_based/_tsfresh.py index 4987e9bfda..ed14e90a47 100644 --- a/aeon/clustering/feature_based/_tsfresh.py +++ b/aeon/clustering/feature_based/_tsfresh.py @@ -162,6 +162,7 @@ def _fit(self, X: np.ndarray, y: Optional[np.ndarray] = None): else: self._estimator.fit(X_t, y) + self.labels_ = self._estimator.labels_ return self def _predict(self, X: np.ndarray) -> np.ndarray: diff --git a/aeon/testing/estimator_checking/_yield_clustering_checks.py b/aeon/testing/estimator_checking/_yield_clustering_checks.py index 4c704f55d7..117a0704b6 100644 --- a/aeon/testing/estimator_checking/_yield_clustering_checks.py +++ b/aeon/testing/estimator_checking/_yield_clustering_checks.py @@ -97,8 +97,6 @@ def check_clusterer_output(estimator, datatype): """ estimator = _clone_estimator(estimator) - unique_labels = np.unique(FULL_TEST_DATA_DICT[datatype]["train"][1]) - # run fit and predict estimator.fit( FULL_TEST_DATA_DICT[datatype]["train"][0], @@ -112,14 +110,9 @@ def check_clusterer_output(estimator, datatype): # check predict assert isinstance(y_pred, np.ndarray) assert y_pred.shape == (get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]),) - assert np.all(np.isin(np.unique(y_pred), unique_labels)) # check predict proba (all classifiers have predict_proba by default) y_proba = estimator.predict_proba(FULL_TEST_DATA_DICT[datatype]["test"][0]) assert isinstance(y_proba, np.ndarray) - assert y_proba.shape == ( - get_n_cases(FULL_TEST_DATA_DICT[datatype]["test"][0]), - len(unique_labels), - ) np.testing.assert_almost_equal(y_proba.sum(axis=1), 1, decimal=4) From ebec4e642365bc0633a1d60ff452f1ababc7496c Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 19 Nov 2024 22:13:27 +0100 Subject: [PATCH 16/25] fix dnns --- aeon/clustering/deep_learning/_ae_resnet.py | 1 + aeon/clustering/deep_learning/base.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/aeon/clustering/deep_learning/_ae_resnet.py b/aeon/clustering/deep_learning/_ae_resnet.py index aed5900d4e..638c64fa57 100644 --- a/aeon/clustering/deep_learning/_ae_resnet.py +++ b/aeon/clustering/deep_learning/_ae_resnet.py @@ -344,6 +344,7 @@ def _fit(self, X): self._fit_clustering(X=X) gc.collect() + return self def _score(self, X, y=None): diff --git a/aeon/clustering/deep_learning/base.py b/aeon/clustering/deep_learning/base.py index 3e4cf6f5ad..64e6cb1cc6 100644 --- a/aeon/clustering/deep_learning/base.py +++ b/aeon/clustering/deep_learning/base.py @@ -136,6 +136,10 @@ def _fit_clustering(self, X): latent_space = self.model_.layers[1].predict(X) self._estimator.fit(X=latent_space) + if hasattr(self._estimator, "labels_"): + self.labels_ = self._estimator.labels_ + else: + self.labels_ = self._estimator.predict(X=latent_space) return self From 02fcee098380671c6a61c1ab24a4cd1be7d041b3 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 19 Nov 2024 22:27:45 +0100 Subject: [PATCH 17/25] pipeline clusterer --- aeon/clustering/compose/_pipeline.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/aeon/clustering/compose/_pipeline.py b/aeon/clustering/compose/_pipeline.py index eeeb2b43b8..1e3ed54ac8 100644 --- a/aeon/clustering/compose/_pipeline.py +++ b/aeon/clustering/compose/_pipeline.py @@ -86,7 +86,9 @@ def __init__(self, transformers, clusterer, random_state=None): ) def _fit(self, X, y=None): - return super()._fit(X, y) + super()._fit(X, y) + self.labels_ = self._estimator.labels_ + return self @classmethod def _get_test_params(cls, parameter_set="default"): From 91048084da1a562052c6819e7e5b174adcd91527 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 19 Nov 2024 22:53:23 +0100 Subject: [PATCH 18/25] fix pipeline --- aeon/clustering/compose/_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/aeon/clustering/compose/_pipeline.py b/aeon/clustering/compose/_pipeline.py index 1e3ed54ac8..3337f7e032 100644 --- a/aeon/clustering/compose/_pipeline.py +++ b/aeon/clustering/compose/_pipeline.py @@ -86,8 +86,8 @@ def __init__(self, transformers, clusterer, random_state=None): ) def _fit(self, X, y=None): - super()._fit(X, y) - self.labels_ = self._estimator.labels_ + return super()._fit(X, y) + self.labels_ = self.clusterer.labels_ return self @classmethod From 1c854875113a3ee86e4cc99c3e58bfd55ee3f01c Mon Sep 17 00:00:00 2001 From: chrisholder Date: Tue, 19 Nov 2024 22:57:18 +0100 Subject: [PATCH 19/25] revert --- aeon/clustering/compose/_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/compose/_pipeline.py b/aeon/clustering/compose/_pipeline.py index 3337f7e032..5a1026550b 100644 --- a/aeon/clustering/compose/_pipeline.py +++ b/aeon/clustering/compose/_pipeline.py @@ -86,7 +86,7 @@ def __init__(self, transformers, clusterer, random_state=None): ) def _fit(self, X, y=None): - return super()._fit(X, y) + super()._fit(X, y) self.labels_ = self.clusterer.labels_ return self From c89d854e8b88f37ed352d701062e10124f1d1387 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 11:27:01 +0100 Subject: [PATCH 20/25] fix --- aeon/clustering/compose/_pipeline.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/aeon/clustering/compose/_pipeline.py b/aeon/clustering/compose/_pipeline.py index 5a1026550b..eb6c255806 100644 --- a/aeon/clustering/compose/_pipeline.py +++ b/aeon/clustering/compose/_pipeline.py @@ -87,7 +87,7 @@ def __init__(self, transformers, clusterer, random_state=None): def _fit(self, X, y=None): super()._fit(X, y) - self.labels_ = self.clusterer.labels_ + self.labels_ = self.steps_[-1][1].labels_ return self @classmethod From 4c1e6d0c2f29aa2bdd23f806bba79ed525c8eeb8 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 11:53:44 +0100 Subject: [PATCH 21/25] remove score from dnns --- aeon/clustering/base.py | 4 +--- aeon/clustering/deep_learning/_ae_abgru.py | 6 ------ aeon/clustering/deep_learning/_ae_bgru.py | 6 ------ aeon/clustering/deep_learning/_ae_dcnn.py | 6 ------ aeon/clustering/deep_learning/_ae_drnn.py | 6 ------ aeon/clustering/deep_learning/_ae_resnet.py | 6 ------ 6 files changed, 1 insertion(+), 33 deletions(-) diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 92989a4c2c..39f216933a 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -160,9 +160,7 @@ def _fit_predict(self, X, y=None) -> np.ndarray: Index of the cluster each time series in X belongs to. """ self.fit(X) - if hasattr(self, "labels_"): - return self.labels_ - return self.predict(X) + return self.labels_ def _predict_proba(self, X) -> np.ndarray: """Predicts labels probabilities for sequences in X. diff --git a/aeon/clustering/deep_learning/_ae_abgru.py b/aeon/clustering/deep_learning/_ae_abgru.py index 88065ddffe..3b41dbfddc 100644 --- a/aeon/clustering/deep_learning/_ae_abgru.py +++ b/aeon/clustering/deep_learning/_ae_abgru.py @@ -298,12 +298,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_bgru.py b/aeon/clustering/deep_learning/_ae_bgru.py index 6e12e23e03..609eaf5d65 100644 --- a/aeon/clustering/deep_learning/_ae_bgru.py +++ b/aeon/clustering/deep_learning/_ae_bgru.py @@ -296,12 +296,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_dcnn.py b/aeon/clustering/deep_learning/_ae_dcnn.py index 4a83a10eb2..d6c6b8c3d5 100644 --- a/aeon/clustering/deep_learning/_ae_dcnn.py +++ b/aeon/clustering/deep_learning/_ae_dcnn.py @@ -322,12 +322,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_drnn.py b/aeon/clustering/deep_learning/_ae_drnn.py index a6551d411d..0efedfb730 100644 --- a/aeon/clustering/deep_learning/_ae_drnn.py +++ b/aeon/clustering/deep_learning/_ae_drnn.py @@ -328,12 +328,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - @classmethod def _get_test_params(cls, parameter_set="default"): """Return testing parameter settings for the estimator. diff --git a/aeon/clustering/deep_learning/_ae_resnet.py b/aeon/clustering/deep_learning/_ae_resnet.py index 26acce6b5b..ff34143281 100644 --- a/aeon/clustering/deep_learning/_ae_resnet.py +++ b/aeon/clustering/deep_learning/_ae_resnet.py @@ -362,12 +362,6 @@ def _fit(self, X): return self - def _score(self, X, y=None): - # Transpose to conform to Keras input style. - X = X.transpose(0, 2, 1) - latent_space = self.model_.layers[1].predict(X) - return self._estimator.score(latent_space) - def _fit_multi_rec_model( self, autoencoder, From 6d6849227c694857441c3d4423880d1835823d99 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 11:56:40 +0100 Subject: [PATCH 22/25] remove score from notebooks --- .../clustering/partitional_clustering.ipynb | 119 +----------------- 1 file changed, 4 insertions(+), 115 deletions(-) diff --git a/examples/clustering/partitional_clustering.ipynb b/examples/clustering/partitional_clustering.ipynb index 646845ac70..817f66bd24 100644 --- a/examples/clustering/partitional_clustering.ipynb +++ b/examples/clustering/partitional_clustering.ipynb @@ -1217,46 +1217,14 @@ "cell_type": "markdown", "source": [ "We have formed two clusters. The pattern of the two centroids seems fairly similar,\n", - "and the separation of clusters does not seem very good. We can score the clustering\n", - "with the score method, which by default returns `self.inertia_`, which is a measure\n", - "of between cluster variation used as a stopping condition.\n", + "and the separation of clusters does not seem very good.\n", "For a range of clusteirng comparison algorithms, see the [sklearn clustering API]\n", - "(https://scikit-learn.org/stable/modules/clustering.html#clustering-performance-evaluation)\n", - "\n", - "Low intertia is better, so to conform to the sklearn interface which prefers to\n", - "maximize performance criteria, inertia scores are negative.\n" + "(https://scikit-learn.org/stable/modules/clustering.html#clustering-performance-evaluation)\n" ], "metadata": { "collapsed": false } }, - { - "cell_type": "code", - "source": [ - "s1 = k_means.score(X_test, y_test)\n", - "s1" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-09-25T22:59:26.618842Z", - "start_time": "2024-09-25T22:59:26.613249Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "-5016911.727324263" - ] - }, - "execution_count": 16, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 16 - }, { "cell_type": "markdown", "source": [ @@ -1332,39 +1300,12 @@ "source": [ "We have formed two clusters with barycentre averaging using MSM distance. The centroids\n", "seem more distinct in shape now, with the first cluster centroid looking much\n", - "flatter than the second. The inertia has been reduced, so the score is higher." + "flatter than the second." ], "metadata": { "collapsed": false } }, - { - "cell_type": "code", - "source": [ - "s2 = k_means.score(X_test, y_test)\n", - "s2" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-09-25T22:59:26.985041Z", - "start_time": "2024-09-25T22:59:26.980060Z" - } - }, - "outputs": [ - { - "data": { - "text/plain": [ - "-26178.727675421676" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], - "execution_count": 18 - }, { "cell_type": "markdown", "source": [ @@ -1441,7 +1382,6 @@ ")\n", "\n", "k_medoids.fit(X_train)\n", - "s3 = k_medoids.score(X_test, y_test)\n", "plot_cluster_algorithm(k_medoids, X_test, k_medoids.n_clusters)" ], "metadata": { @@ -1525,7 +1465,6 @@ ")\n", "\n", "k_medoids.fit(X_train)\n", - "s4 = k_medoids.score(X_test, y_test)\n", "plot_cluster_algorithm(k_medoids, X_test, k_medoids.n_clusters)" ], "metadata": { @@ -1568,29 +1507,6 @@ ], "execution_count": 21 }, - { - "cell_type": "code", - "source": [ - "print(f\" PAM DTW score {s3} PAM MSM score {s4}\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-09-25T22:59:27.856857Z", - "start_time": "2024-09-25T22:59:27.851841Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " PAM DTW score -5739730.0 PAM MSM score -25984.0\n" - ] - } - ], - "execution_count": 22 - }, { "cell_type": "markdown", "source": [ @@ -1619,9 +1535,7 @@ ")\n", "\n", "k_medoids.fit(X_train)\n", - "s5 = k_medoids.score(X_test, y_test)\n", - "plot_cluster_algorithm(k_medoids, X_test, k_medoids.n_clusters)\n", - "print(\"Alternate MSM score = \", s5)" + "plot_cluster_algorithm(k_medoids, X_test, k_medoids.n_clusters)" ], "metadata": { "collapsed": false, @@ -1686,7 +1600,6 @@ " random_state=1,\n", ")\n", "clara.fit(X_train)\n", - "s6 = k_medoids.score(X_test, y_test)\n", "plot_cluster_algorithm(clara, X_test, clara.n_clusters)" ], "metadata": { @@ -1755,7 +1668,6 @@ " random_state=1,\n", ")\n", "clara.fit(X_train)\n", - "s7 = k_medoids.score(X_test, y_test)\n", "plot_cluster_algorithm(clara, X_test, clara.n_clusters)" ], "metadata": { @@ -1798,29 +1710,6 @@ ], "execution_count": 25 }, - { - "cell_type": "code", - "source": [ - "print(f\" Clara score {s6} Clarans score = {s7}\")" - ], - "metadata": { - "collapsed": false, - "ExecuteTime": { - "end_time": "2024-09-25T22:59:28.966368Z", - "start_time": "2024-09-25T22:59:28.961420Z" - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " Clara score -25984.0 Clarans score = -25984.0\n" - ] - } - ], - "execution_count": 26 - }, { "cell_type": "markdown", "source": [ From a56b4d4442442e6b5a201a5ff9d6cbd104252386 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 12:03:15 +0100 Subject: [PATCH 23/25] remove score from remaining --- aeon/clustering/_k_shapes.py | 3 --- aeon/clustering/_kernel_k_means.py | 3 --- 2 files changed, 6 deletions(-) diff --git a/aeon/clustering/_k_shapes.py b/aeon/clustering/_k_shapes.py index 4cb50fdec5..8809582a95 100644 --- a/aeon/clustering/_k_shapes.py +++ b/aeon/clustering/_k_shapes.py @@ -181,6 +181,3 @@ def _get_test_params(cls, parameter_set="default"): "verbose": False, "random_state": 1, } - - def _score(self, X, y=None): - return np.abs(self.inertia_) diff --git a/aeon/clustering/_kernel_k_means.py b/aeon/clustering/_kernel_k_means.py index 2a7b255757..9c92d250bd 100644 --- a/aeon/clustering/_kernel_k_means.py +++ b/aeon/clustering/_kernel_k_means.py @@ -205,6 +205,3 @@ def _get_test_params(cls, parameter_set="default") -> dict: "n_jobs": 1, "random_state": 1, } - - def _score(self, X, y=None) -> float: - return np.abs(self.inertia_) From f6b0ffb940b5f8ae277a40d2a8b1624ef09780ad Mon Sep 17 00:00:00 2001 From: chrisholder Date: Wed, 20 Nov 2024 13:55:41 +0100 Subject: [PATCH 24/25] fix notebook --- examples/networks/deep_learning.ipynb | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/networks/deep_learning.ipynb b/examples/networks/deep_learning.ipynb index 130b7166d7..498498f5fe 100644 --- a/examples/networks/deep_learning.ipynb +++ b/examples/networks/deep_learning.ipynb @@ -279,7 +279,6 @@ "xtest, ytest = load_classification(name=\"ArrowHead\", split=\"test\")\n", "\n", "aefcn = AEFCNClusterer(\n", - " n_clusters=2,\n", " temporal_latent_space=False,\n", " clustering_algorithm=\"kmeans\",\n", " n_epochs=10,\n", From 045edfaff1dda4c4673d42cbcac931182af7a2b4 Mon Sep 17 00:00:00 2001 From: chrisholder Date: Thu, 21 Nov 2024 12:15:04 +0100 Subject: [PATCH 25/25] fixed notebook --- examples/networks/deep_learning.ipynb | 184 +++++++++++++++----------- 1 file changed, 104 insertions(+), 80 deletions(-) diff --git a/examples/networks/deep_learning.ipynb b/examples/networks/deep_learning.ipynb index b0f998d452..9e06feabbb 100644 --- a/examples/networks/deep_learning.ipynb +++ b/examples/networks/deep_learning.ipynb @@ -62,12 +62,17 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-21T11:14:08.477299Z", + "start_time": "2024-11-21T11:14:08.433390Z" + } + }, "source": [ "import os\n", "\n", + "from sklearn.cluster import KMeans\n", + "\n", "# avoid imports warning for tensorflow\n", "os.environ[\"TF_CPP_MIN_LOG_LEVEL\"] = \"2\"\n", "\n", @@ -90,7 +95,9 @@ "from aeon.datasets import load_classification, load_regression\n", "from aeon.networks import InceptionNetwork\n", "from aeon.regression.deep_learning import InceptionTimeRegressor" - ] + ], + "outputs": [], + "execution_count": 7 }, { "attachments": {}, @@ -107,30 +114,12 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "2023-10-06 17:31:12.574639: E tensorflow/compiler/xla/stream_executor/cuda/cuda_driver.cc:266] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3/3 [==============================] - 0s 68ms/step\n", - "3/3 [==============================] - 0s 52ms/step\n", - "3/3 [==============================] - 0s 54ms/step\n", - "3/3 [==============================] - 0s 84ms/step\n", - "3/3 [==============================] - 0s 86ms/step\n", - "Predictions: ['0' '0' '0' '0' '0']\n", - "Ground Truth: ['0' '0' '0' '0' '0']\n" - ] + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-21T11:12:15.141664Z", + "start_time": "2024-11-21T11:12:02.084792Z" } - ], + }, "source": [ "xtrain, ytrain = load_classification(name=\"ArrowHead\", split=\"train\")\n", "xtest, ytest = load_classification(name=\"ArrowHead\", split=\"test\")\n", @@ -141,7 +130,23 @@ "\n", "print(\"Predictions: \", ypred[0:5])\n", "print(\"Ground Truth: \", ytest[0:5])" - ] + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 72ms/step\n", + "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 78ms/step\n", + "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 72ms/step\n", + "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 74ms/step\n", + "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 82ms/step\n", + "Predictions: ['2' '2' '2' '2' '2']\n", + "Ground Truth: ['0' '0' '0' '0' '0']\n" + ] + } + ], + "execution_count": 2 }, { "attachments": {}, @@ -163,23 +168,12 @@ }, { "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "3/3 [==============================] - 0s 59ms/step\n", - "3/3 [==============================] - 0s 51ms/step\n", - "3/3 [==============================] - 0s 60ms/step\n", - "3/3 [==============================] - 0s 74ms/step\n", - "3/3 [==============================] - 0s 54ms/step\n", - "Predictions: ['0' '0' '0' '0' '0']\n", - "Ground Truth: ['0' '0' '0' '0' '0']\n" - ] + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-21T11:12:30.252190Z", + "start_time": "2024-11-21T11:12:15.222773Z" } - ], + }, "source": [ "xtrain, ytrain = load_classification(name=\"ArrowHead\", split=\"train\")\n", "xtest, ytest = load_classification(name=\"ArrowHead\", split=\"test\")\n", @@ -190,7 +184,23 @@ "\n", "print(\"Predictions: \", ypred[0:5])\n", "print(\"Ground Truth: \", ytest[0:5])" - ] + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 94ms/step\n", + "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 91ms/step\n", + "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 94ms/step\n", + "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 97ms/step\n", + "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 96ms/step\n", + "Predictions: ['0' '0' '0' '0' '0']\n", + "Ground Truth: ['0' '0' '0' '0' '0']\n" + ] + } + ], + "execution_count": 3 }, { "cell_type": "markdown", @@ -205,23 +215,12 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1/1 [==============================] - 0s 198ms/step\n", - "1/1 [==============================] - 0s 219ms/step\n", - "1/1 [==============================] - 0s 196ms/step\n", - "1/1 [==============================] - 0s 195ms/step\n", - "1/1 [==============================] - 0s 209ms/step\n", - "Predictions: [-0.23141143 -0.0241483 -0.01730516 -0.34630919 -3.14292431]\n", - "Ground Truth: [0.0118838 0.00379507 0.08298755 0.04510921 0.12783075]\n" - ] + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-21T11:12:43.183566Z", + "start_time": "2024-11-21T11:12:30.257417Z" } - ], + }, "source": [ "xtrain, ytrain = load_regression(name=\"Covid3Month\", split=\"train\")\n", "xtest, ytest = load_regression(name=\"Covid3Month\", split=\"test\")\n", @@ -232,7 +231,23 @@ "\n", "print(\"Predictions: \", ypred[0:5])\n", "print(\"Ground Truth: \", ytest[0:5])" - ] + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 107ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 109ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 112ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 114ms/step\n", + "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 108ms/step\n", + "Predictions: [-0.4258549 -0.0387525 -0.01732254 -0.60533425 -4.51287463]\n", + "Ground Truth: [0.0118838 0.00379507 0.08298755 0.04510921 0.12783075]\n" + ] + } + ], + "execution_count": 4 }, { "attachments": {}, @@ -257,30 +272,19 @@ }, { "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2/2 [==============================] - 0s 5ms/step\n", - "6/6 [==============================] - 0s 25ms/step\n", - "Predictions: [0 1 0 0 0]\n", - "Ground Truth: ['0' '0' '0' '0' '0']\n", - "\n", - "6/6 [==============================] - 0s 32ms/step\n", - "Score : -701.7947322426963\n" - ] + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-21T11:14:19.577898Z", + "start_time": "2024-11-21T11:14:16.120345Z" } - ], + }, "source": [ "xtrain, _ = load_classification(name=\"ArrowHead\", split=\"train\")\n", "xtest, ytest = load_classification(name=\"ArrowHead\", split=\"test\")\n", "\n", "aefcn = AEFCNClusterer(\n", " temporal_latent_space=False,\n", - " estimator=\"kmeans\",\n", + " estimator=KMeans(n_clusters=3),\n", " n_epochs=10,\n", ")\n", "\n", @@ -288,7 +292,20 @@ "ypred = aefcn.predict(X=xtest)\n", "print(\"Predictions: \", ypred[0:5])\n", "print(\"Ground Truth: \", ytest[0:5])" - ] + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001B[1m2/2\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 27ms/step\n", + "\u001B[1m6/6\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 8ms/step \n", + "Predictions: [2 0 2 2 2]\n", + "Ground Truth: ['0' '0' '0' '0' '0']\n" + ] + } + ], + "execution_count": 8 }, { "attachments": {}, @@ -727,6 +744,13 @@ "- [7] Lafabregue, Baptiste, et al. \"End-to-end deep representation learning for time series clustering:\n", " a comparative study.\" Data Mining and Knowledge Discovery 36.1 (2022): 29-81." ] + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "" } ], "metadata": {