From 23f3f0bbc43e5de404799c82ed9e964f42dc18ca Mon Sep 17 00:00:00 2001 From: Matthew Middlehurst Date: Fri, 22 Nov 2024 22:22:33 +0200 Subject: [PATCH] [ENH] Collection and series base tidy (#2352) * base changes * comment fix * fixes * fixes * add abstract to base init * init * more init * more init * merge --- aeon/anomaly_detection/base.py | 31 ++-- .../tests/test_left_stampi.py | 2 +- aeon/base/_base.py | 7 +- aeon/base/_base_collection.py | 139 +++++++++++------- aeon/base/_base_series.py | 108 +++++++------- .../compose/collection_channel_ensemble.py | 3 + .../estimators/compose/collection_ensemble.py | 3 + .../estimators/compose/collection_pipeline.py | 3 + aeon/base/tests/test_base.py | 2 - aeon/base/tests/test_base_collection.py | 27 ++-- aeon/base/tests/test_base_series.py | 17 ++- aeon/base/tests/test_compose.py | 3 - aeon/classification/base.py | 11 +- aeon/classification/deep_learning/base.py | 1 + .../early_classification/base.py | 14 +- aeon/classification/shapelet_based/_rdst.py | 2 - aeon/classification/shapelet_based/_stc.py | 2 - aeon/classification/tests/test_base.py | 4 +- aeon/clustering/base.py | 6 +- aeon/clustering/deep_learning/base.py | 1 + aeon/clustering/tests/test_base.py | 3 + aeon/networks/base.py | 11 +- aeon/regression/base.py | 22 +-- aeon/regression/deep_learning/base.py | 1 + aeon/regression/shapelet_based/_rdst.py | 2 - aeon/regression/tests/test_base.py | 11 +- aeon/segmentation/_eagglo.py | 2 +- aeon/segmentation/base.py | 17 ++- aeon/similarity_search/base.py | 1 + .../_yield_anomaly_detection_checks.py | 4 +- .../_yield_segmentation_checks.py | 4 +- .../mock_estimators/_mock_classifiers.py | 3 + aeon/transformations/base.py | 3 + aeon/transformations/collection/_reduce.py | 3 + aeon/transformations/collection/base.py | 3 +- .../collection/channel_selection/base.py | 6 +- .../collection/compose/_identity.py | 3 + .../signature_based/_augmentations.py | 12 ++ aeon/transformations/series/base.py | 7 +- aeon/utils/_data_types.py | 15 +- aeon/utils/conversion/_convert_collection.py | 2 +- examples/base/series_estimator.ipynb | 9 +- 42 files changed, 287 insertions(+), 243 deletions(-) diff --git a/aeon/anomaly_detection/base.py b/aeon/anomaly_detection/base.py index 87ca40aa19..2e333cf755 100644 --- a/aeon/anomaly_detection/base.py +++ b/aeon/anomaly_detection/base.py @@ -10,7 +10,7 @@ import pandas as pd from aeon.base import BaseSeriesEstimator -from aeon.base._base_series import VALID_INPUT_TYPES +from aeon.base._base_series import VALID_SERIES_INPUT_TYPES class BaseAnomalyDetector(BaseSeriesEstimator): @@ -76,7 +76,7 @@ class BaseAnomalyDetector(BaseSeriesEstimator): """ _tags = { - "X_inner_type": "np.ndarray", # One of VALID_INNER_TYPES + "X_inner_type": "np.ndarray", # One of VALID_SERIES_INNER_TYPES "fit_is_empty": True, "requires_y": False, } @@ -95,14 +95,14 @@ def fit(self, X, y=None, axis=1): Parameters ---------- - X : one of aeon.base._base_series.VALID_INPUT_TYPES + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES The time series to fit the model to. A valid aeon time series data structure. See - aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. - y : one of aeon.base._base_series.VALID_INPUT_TYPES, default=None + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None The target values for the time series. A valid aeon time series data structure. See - aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. axis : int The time point axis of the input series if it is 2D. If ``axis==0``, it is assumed each column is a time series and each row is a time point. i.e. the @@ -142,10 +142,10 @@ def predict(self, X, axis=1) -> np.ndarray: Parameters ---------- - X : one of aeon.base._base_series.VALID_INPUT_TYPES + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES The time series to fit the model to. A valid aeon time series data structure. See - aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. axis : int, default=1 The time point axis of the input series if it is 2D. If ``axis==0``, it is assumed each column is a time series and each row is a time point. i.e. the @@ -173,14 +173,14 @@ def fit_predict(self, X, y=None, axis=1) -> np.ndarray: Parameters ---------- - X : one of aeon.base._base_series.VALID_INPUT_TYPES + X : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES The time series to fit the model to. A valid aeon time series data structure. See aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. - y : one of aeon.base._base_series.VALID_INPUT_TYPES, default=None + y : one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES, default=None The target values for the time series. A valid aeon time series data structure. See - aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. axis : int, default=1 The time point axis of the input series if it is 2D. If ``axis==0``, it is assumed each column is a time series and each row is a time point. i.e. the @@ -226,7 +226,7 @@ def _fit_predict(self, X, y): self._fit(X, y) return self._predict(X) - def _check_y(self, y: VALID_INPUT_TYPES) -> np.ndarray: + def _check_y(self, y: VALID_SERIES_INPUT_TYPES) -> np.ndarray: # Remind user if y is not required for this estimator on failure req_msg = ( f"{self.__class__.__name__} does not require a y input." @@ -235,7 +235,8 @@ def _check_y(self, y: VALID_INPUT_TYPES) -> np.ndarray: ) new_y = y - # must be a valid input type, see VALID_INPUT_TYPES in BaseSeriesEstimator + # must be a valid input type, see VALID_SERIES_INPUT_TYPES in + # BaseSeriesEstimator if isinstance(y, np.ndarray): # check valid shape if y.ndim > 1: @@ -284,8 +285,8 @@ def _check_y(self, y: VALID_INPUT_TYPES) -> np.ndarray: new_y = y.squeeze().values else: raise ValueError( - f"Error in input type for y: it should be one of {VALID_INPUT_TYPES}, " - f"saw {type(y)}" + f"Error in input type for y: it should be one of " + f"{VALID_SERIES_INPUT_TYPES}, saw {type(y)}" ) new_y = new_y.astype(bool) diff --git a/aeon/anomaly_detection/tests/test_left_stampi.py b/aeon/anomaly_detection/tests/test_left_stampi.py index 872f6af149..589d163f7b 100644 --- a/aeon/anomaly_detection/tests/test_left_stampi.py +++ b/aeon/anomaly_detection/tests/test_left_stampi.py @@ -1,4 +1,4 @@ -"""Tests for the LaftSTAMPi class.""" +"""Tests for the LeftSTAMPi class.""" __maintainer__ = ["ferewi"] diff --git a/aeon/base/_base.py b/aeon/base/_base.py index 6a9f7dbb70..41ac7010f3 100644 --- a/aeon/base/_base.py +++ b/aeon/base/_base.py @@ -4,7 +4,7 @@ __all__ = ["BaseAeonEstimator"] import inspect -from abc import ABC +from abc import ABC, abstractmethod from copy import deepcopy from sklearn import clone @@ -12,6 +12,8 @@ from sklearn.ensemble._base import _set_random_states from sklearn.exceptions import NotFittedError +from aeon.utils.validation._dependencies import _check_estimator_deps + class BaseAeonEstimator(BaseEstimator, ABC): """ @@ -44,12 +46,15 @@ class BaseAeonEstimator(BaseEstimator, ABC): "capability:multithreading": False, } + @abstractmethod def __init__(self): self.is_fitted = False # flag to indicate if fit has been called self._tags_dynamic = dict() # storage for dynamic tags super().__init__() + _check_estimator_deps(self) + def reset(self, keep=None): """ Reset the object to a clean post-init state. diff --git a/aeon/base/_base_collection.py b/aeon/base/_base_collection.py index 8319f45d8f..ea3b21ed32 100644 --- a/aeon/base/_base_collection.py +++ b/aeon/base/_base_collection.py @@ -1,5 +1,7 @@ """Base class for estimators that fit collections of time series.""" +from abc import abstractmethod + import numpy as np from aeon.base._base import BaseAeonEstimator @@ -9,7 +11,6 @@ resolve_unequal_length_inner_type, ) from aeon.utils.validation import check_n_jobs -from aeon.utils.validation._dependencies import _check_estimator_deps from aeon.utils.validation.collection import ( get_n_cases, get_n_channels, @@ -22,12 +23,13 @@ class BaseCollectionEstimator(BaseAeonEstimator): - """Base class for estimators that use collections of time series for ``fit``. + """ + Base class for estimators that use collections of time series for ``fit``. - Provides functions that are common to estimators which use colections such as + Provides functions that are common to estimators which use collections such as ``BaseClassifier``, ``BaseRegressor``, ``BaseClusterer``, ``BaseSimilaritySearch`` and ``BaseCollectionTransformer``. Functionality includes checking and - conversion of input to ``fit, predict and predict_proba, where relevant. + conversion of input in ``fit``, ``predict`` and ``predict_proba``, where relevant. It also stores the common default tags used by all the subclasses and meta data describing the characteristics of time series passed to ``fit``. @@ -40,45 +42,54 @@ class BaseCollectionEstimator(BaseAeonEstimator): "X_inner_type": "numpy3D", } + @abstractmethod def __init__(self): self.metadata_ = {} # metadata/properties of data seen in fit - self.fit_time_ = 0 # time elapsed in last fit call self._n_jobs = 1 + super().__init__() - _check_estimator_deps(self) def _preprocess_collection(self, X, store_metadata=True): - """Preprocess input X prior to call to fit. + """ + Preprocess input X prior to calling fit. - 1. Checks the characteristics of X, store metadata, checks self can handle - the data - 2. convert X to X_inner_type - 3. Check multi-threading against capabilities + 1. Checks the characteristics of X and that self can handle the data + 2. Stores metadata about X in self.metadata_ if store_metadata is True + 3. Converts X to X_inner_type if necessary Parameters ---------- X : collection - See aeon.utils.COLLECTIONS_DATA_TYPES for details - on aeon supported data structures. + See aeon.utils.COLLECTIONS_DATA_TYPES for details on aeon supported + data structures. store_metadata : bool, default=True Whether to store metadata about X in self.metadata_. Returns ------- - Data structure of type self.tags["X_inner_type"] + X : collection + Processed X. A data structure of type self.get_tag("X_inner_type"). + + Raises + ------ + ValueError + If X is an invalid type or has characteristics that the estimator cannot + handle. See Also -------- - _check_X : function that checks X is valid before conversion. - _convert_X : function that converts to inner type. + _check_X : + Function that checks X is valid before conversion. + _convert_X : + Function that converts to inner type. Examples -------- - >>> from aeon.base import BaseCollectionEstimator - >>> import numpy as np - >>> bce = BaseCollectionEstimator() - >>> X = np.random.random(size=(10,20)) - >>> X2 = bce._preprocess_collection(X) + >>> from aeon.testing.mock_estimators import MockClassifier + >>> from aeon.testing.data_generation import make_example_2d_numpy_collection + >>> clf = MockClassifier() + >>> X, _ = make_example_2d_numpy_collection(n_cases=10, n_timepoints=20) + >>> X2 = clf._preprocess_collection(X) >>> X2.shape (10, 1, 20) """ @@ -101,24 +112,25 @@ def _preprocess_collection(self, X, store_metadata=True): return X def _check_X(self, X): - """Check classifier input X is valid. + """ + Check classifier input X is valid. Check if the input data is a compatible type, and that this estimator is - able to handle the data characteristics. This is done by matching the - capabilities of the estimator against the metadata for X for - univariate/multivariate, equal length/unequal length and no missing + able to handle the data characteristics. + This is done by matching the capabilities of the estimator against the metadata + for X i.e., univariate/multivariate, equal length/unequal length and no missing values/missing values. Parameters ---------- - X : data structure - See aeon.utils.COLLECTIONS_DATA_TYPES for details - on aeon supported data structures. + X : collection + See aeon.utils.COLLECTIONS_DATA_TYPES for details on aeon supported + data structures. Returns ------- - dict - Meta data about X, with flags: + metadata : dict + Metadata about X, with flags: metadata["multivariate"] : whether X has more than one channel or not metadata["missing_values"] : whether X has missing values or not metadata["unequal_length"] : whether X contains unequal length series. @@ -127,18 +139,28 @@ def _check_X(self, X): metadata["n_timepoints"] : number of timepoints in X if equal length, else None + Raises + ------ + ValueError + If X is an invalid type or has characteristics that the estimator cannot + handle. + See Also -------- - _convert_X : function that converts X after it has been checked. + _convert_X : + Function that converts X after it has been checked. Examples -------- - >>> from aeon.classification.hybrid import HIVECOTEV2 - >>> import numpy as np - >>> X = np.random.random(size=(5,3,10)) # X is equal length, multivariate - >>> hc = HIVECOTEV2() - >>> meta = hc._check_X(X) # HC2 can handle this + >>> from aeon.testing.mock_estimators import MockClassifierFullTags + >>> from aeon.testing.data_generation import make_example_3d_numpy + >>> clf = MockClassifierFullTags() + >>> X, _ = make_example_3d_numpy(n_channels=3) # X is equal length, multivariate + >>> meta = clf._check_X(X) # Classifier can handle this """ + # check if X is a valid type + get_type(X) + metadata = self._get_X_metadata(X) # Check classifier capabilities for X allow_multivariate = self.get_tag("capability:multivariate") @@ -157,51 +179,54 @@ def _check_X(self, X): if problems: # construct error message problems_and = " and ".join(problems) - problems_or = " or ".join(problems) msg = ( f"Data seen by instance of {type(self).__name__} has {problems_and}, " - f"but {type(self).__name__} cannot handle {problems_or}. " + f"but {type(self).__name__} cannot handle these characteristics. " ) raise ValueError(msg) return metadata def _convert_X(self, X): - """Convert X to type defined by tag X_inner_type. + """ + Convert X to type defined by tag X_inner_type. If the input data is already an allowed type, it is returned unchanged. If multiple types are allowed by self, then the best one for the type of input - data is selected. So, for example, if X_inner_tag is `["np-list", "numpy3D"]` - and an `df-list` is passed containing equal length series, will be converted - to numpy3D. + data is selected. So, for example, if X_inner_tag is ["np-list", "numpy3D"] + and an df-list is passed, it will be converted to numpy3D if the series + are equal length, and np-list if the series are unequal length. Parameters ---------- - X : data structure - Must be of type aeon.utils.COLLECTIONS_DATA_TYPES. + X : collection + See aeon.utils.COLLECTIONS_DATA_TYPES for details on aeon supported + data structures. Returns ------- - data structure of type one of self.get_tag("X_inner_type"). + X : collection + Converted X. A data structure of type self.get_tag("X_inner_type"). See Also -------- - _check_X : function that checks X is valid and finds metadata. + _check_X : + Function that checks X is valid and finds metadata. Examples -------- - >>> from aeon.classification.hybrid import HIVECOTEV2 - >>> import numpy as np + >>> from aeon.testing.mock_estimators import MockClassifier + >>> from aeon.testing.data_generation import make_example_3d_numpy_list >>> from aeon.utils.validation import get_type - >>> X = [np.random.random(size=(5,10)), np.random.random(size=(5,10))] + >>> clf = MockClassifier() + >>> X, _ = make_example_3d_numpy_list(max_n_timepoints=8) >>> get_type(X) 'np-list' - >>> hc = HIVECOTEV2() - >>> hc.get_tag("X_inner_type") + >>> clf.get_tag("X_inner_type") 'numpy3D' - >>> X = hc._convert_X(X) - >>> get_type(X) + >>> X2 = clf._convert_X(X) + >>> get_type(X2) 'numpy3D' """ inner_type = self.get_tag("X_inner_type") @@ -229,12 +254,18 @@ def _convert_X(self, X): return convert_collection(X, inner_type) def _check_shape(self, X): - """Check that the shape of X is consistent with the data seen in fit. + """ + Check that the shape of X is consistent with the data seen in fit. Parameters ---------- X : data structure Must be of type aeon.registry.COLLECTIONS_DATA_TYPES. + + Raises + ------ + ValueError + If the shape of X is not consistent with the data seen in fit. """ # if metadata is empty, then we have not seen any data in fit. If the estimator # has not been fitted, then _is_fitted should catch this. diff --git a/aeon/base/_base_series.py b/aeon/base/_base_series.py index 327114a9c3..6c86940f5b 100644 --- a/aeon/base/_base_series.py +++ b/aeon/base/_base_series.py @@ -7,33 +7,35 @@ __maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"] __all__ = ["BaseSeriesEstimator"] +from abc import abstractmethod import numpy as np import pandas as pd from aeon.base._base import BaseAeonEstimator -from aeon.utils.validation._dependencies import _check_estimator_deps # allowed input and internal data types for Series -VALID_INNER_TYPES = [ +VALID_SERIES_INNER_TYPES = [ "np.ndarray", "pd.DataFrame", ] -VALID_INPUT_TYPES = [pd.DataFrame, pd.Series, np.ndarray] +VALID_SERIES_INPUT_TYPES = [pd.DataFrame, pd.Series, np.ndarray] class BaseSeriesEstimator(BaseAeonEstimator): - """Base class for estimators that use single (possibly multivariate) time series. + """ + Base class for estimators that use single (possibly multivariate) time series. - Provides functions that are common to BaseSeriesEstimator objects for the checking - and conversion of input to fit, predict and transform, where relevant. + Provides functions that are common to estimators which use single series such as + ``BaseAnomalyDetector``, ``BaseSegmenter``, ``BaseForecaster``, + and ``BaseSeriesTransformer``. Functionality includes checking and + conversion of input to ``fit``, ``predict`` and ``predict_proba``, where relevant. - It also stores the common default tags used by all the subclasses and metadata - describing the characteristics of time series passed to ``fit`` (or another method - if fit does not exist). + It also stores the common default tags used by all the subclasses and meta data + describing the characteristics of time series passed to ``fit``. - Input and internal data format (where m is the number of time points and d is the - number of channels): + Input and internal data format (where ``m`` is the number of time points and ``d`` + is the number of channels): Univariate series: np.ndarray, shape ``(m,)``, ``(m, 1)`` or ``(1, m)`` depending on axis. This is converted to a 2D np.ndarray internally. @@ -57,15 +59,45 @@ class BaseSeriesEstimator(BaseAeonEstimator): _tags = { "capability:univariate": True, "capability:multivariate": False, - "X_inner_type": "np.ndarray", # one of VALID_INNER_TYPES + "X_inner_type": "np.ndarray", # one of VALID_SERIES_INNER_TYPES } + @abstractmethod def __init__(self, axis): self.axis = axis - self.metadata_ = {} # metadata/properties of data seen in fit/predict/transform + self.metadata_ = {} # metadata/properties of data seen in fit super().__init__() - _check_estimator_deps(self) + + def _preprocess_series(self, X, axis, store_metadata): + """Preprocess input X prior to call to fit. + + Checks the characteristics of X, store metadata, checks self can handle + the data then convert X to X_inner_type + + Parameters + ---------- + X: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + A valid aeon time series data structure. See + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. + axis: int + The time point axis of the input series if it is 2D. If ``axis==0``, it is + assumed each column is a time series and each row is a time point. i.e. the + shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates + the time series are in rows, i.e. the shape of the data is + ``(n_channels, n_timepoints)``. + store_metadata: bool + If True, overwrite metadata with the new metadata from X. + + Returns + ------- + X: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES + Input time series with data structure of type self.get_tag("X_inner_type"). + """ + meta = self._check_X(X, axis) + if store_metadata: + self.metadata_ = meta + return self._convert_X(X, axis) def _check_X(self, X, axis): """Check input X is valid. @@ -77,9 +109,9 @@ def _check_X(self, X, axis): Parameters ---------- - X: one of aeon.base._base_series.VALID_INPUT_TYPES + X: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES A valid aeon time series data structure. See - aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. axis: int The time point axis of the input series if it is 2D. If ``axis==0``, it is assumed each column is a time series and each row is a time point. i.e. the @@ -113,7 +145,8 @@ def _check_X(self, X, axis): raise ValueError("pd.DataFrame dtype must be numeric") else: raise ValueError( - f"Input type of X should be one of {VALID_INNER_TYPES}, saw {type(X)}" + f"Input type of X should be one of {VALID_SERIES_INNER_TYPES}, " + f"saw {type(X)}" ) metadata = {} @@ -171,9 +204,9 @@ def _convert_X(self, X, axis): Parameters ---------- - X: one of aeon.base._base_series.VALID_INPUT_TYPES + X: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES A valid aeon time series data structure. See - aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. axis: int The time point axis of the input series if it is 2D. If ``axis==0``, it is assumed each column is a time series and each row is a time point. i.e. the @@ -183,7 +216,7 @@ def _convert_X(self, X, axis): Returns ------- - X: one of aeon.base._base_series.VALID_INPUT_TYPES + X: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES Input time series with data structure of type self.get_tag("X_inner_type"). """ if axis > 1 or axis < 0: @@ -217,38 +250,3 @@ def _convert_X(self, X, axis): X = X[np.newaxis, :] if self.axis == 1 else X[:, np.newaxis] return X - - def _preprocess_series(self, X, axis, store_metadata): - """Preprocess input X prior to call to fit. - - Checks the characteristics of X, store metadata, checks self can handle - the data then convert X to X_inner_type - - Parameters - ---------- - X: one of aeon.base._base_series.VALID_INPUT_TYPES - A valid aeon time series data structure. See - aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. - axis: int or None - The time point axis of the input series if it is 2D. If ``axis==0``, it is - assumed each column is a time series and each row is a time point. i.e. the - shape of the data is ``(n_timepoints, n_channels)``. ``axis==1`` indicates - the time series are in rows, i.e. the shape of the data is - ``(n_channels, n_timepoints)``. - If None, the default class axis is used. - store_metadata: bool - If True, overwrite metadata with the new metadata from X. - - Returns - ------- - X: one of aeon.base._base_series.VALID_INPUT_TYPES - Input time series with data structure of type self.get_tag("X_inner_type"). - """ - if axis is None: - axis = self.axis - - meta = self._check_X(X, axis) - if store_metadata: - self.metadata_ = meta - - return self._convert_X(X, axis) diff --git a/aeon/base/estimators/compose/collection_channel_ensemble.py b/aeon/base/estimators/compose/collection_channel_ensemble.py index 4164536f19..91ed44ef25 100644 --- a/aeon/base/estimators/compose/collection_channel_ensemble.py +++ b/aeon/base/estimators/compose/collection_channel_ensemble.py @@ -6,6 +6,8 @@ __maintainer__ = ["MatthewMiddlehurst"] __all__ = ["BaseCollectionChannelEnsemble"] +from abc import abstractmethod + import numpy as np from sklearn.base import BaseEstimator from sklearn.utils import check_random_state @@ -71,6 +73,7 @@ class BaseCollectionChannelEnsemble(ComposableEstimatorMixin, BaseCollectionEsti # Used in get_fitted_params _fitted_estimators_attr = "ensemble_" + @abstractmethod def __init__( self, _ensemble, diff --git a/aeon/base/estimators/compose/collection_ensemble.py b/aeon/base/estimators/compose/collection_ensemble.py index 1223414ae9..11e9fbc81b 100644 --- a/aeon/base/estimators/compose/collection_ensemble.py +++ b/aeon/base/estimators/compose/collection_ensemble.py @@ -6,6 +6,8 @@ __maintainer__ = ["MatthewMiddlehurst"] __all__ = ["BaseCollectionEnsemble"] +from abc import abstractmethod + import numpy as np from sklearn.base import BaseEstimator, is_classifier from sklearn.metrics import accuracy_score, mean_squared_error @@ -78,6 +80,7 @@ class BaseCollectionEnsemble(ComposableEstimatorMixin, BaseCollectionEstimator): # Used in get_fitted_params _fitted_estimators_attr = "ensemble_" + @abstractmethod def __init__( self, _ensemble, diff --git a/aeon/base/estimators/compose/collection_pipeline.py b/aeon/base/estimators/compose/collection_pipeline.py index 48e333d431..1b90c6c411 100644 --- a/aeon/base/estimators/compose/collection_pipeline.py +++ b/aeon/base/estimators/compose/collection_pipeline.py @@ -6,6 +6,8 @@ __maintainer__ = ["MatthewMiddlehurst"] __all__ = ["BaseCollectionPipeline"] +from abc import abstractmethod + import numpy as np from sklearn.base import BaseEstimator from sklearn.utils import check_random_state @@ -59,6 +61,7 @@ class BaseCollectionPipeline(ComposableEstimatorMixin, BaseCollectionEstimator): # Used in get_fitted_params _fitted_estimators_attr = "steps_" + @abstractmethod def __init__(self, transformers, _estimator, random_state=None): self.transformers = transformers self._estimator = _estimator diff --git a/aeon/base/tests/test_base.py b/aeon/base/tests/test_base.py index 1caafa0cdf..fcb4ac1907 100644 --- a/aeon/base/tests/test_base.py +++ b/aeon/base/tests/test_base.py @@ -234,7 +234,6 @@ def test_get_fitted_params(): comp_params = composite.get_fitted_params() expected = { - "fit_time_", "foo_", "classes_", "metadata_", @@ -250,7 +249,6 @@ def test_get_fitted_params(): { "mock_", "mock___classes_", - "mock___fit_time_", "mock___foo_", "mock___metadata_", "mock___n_classes_", diff --git a/aeon/base/tests/test_base_collection.py b/aeon/base/tests/test_base_collection.py index 9fd887eb86..97e2232c66 100644 --- a/aeon/base/tests/test_base_collection.py +++ b/aeon/base/tests/test_base_collection.py @@ -6,6 +6,7 @@ import pytest from aeon.base import BaseCollectionEstimator +from aeon.testing.mock_estimators import MockClassifier from aeon.testing.testing_data import ( EQUAL_LENGTH_MULTIVARIATE_CLASSIFICATION, EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION, @@ -83,8 +84,8 @@ def test_get_metadata(data): def test_check_X(): """Test if capabilities correctly tested.""" - dummy1 = BaseCollectionEstimator() - dummy2 = BaseCollectionEstimator() + dummy1 = MockClassifier() + dummy2 = MockClassifier() all_tags = { "capability:multivariate": True, "capability:unequal_length": True, @@ -99,34 +100,34 @@ def test_check_X(): # univariate missing values X[3][0][6] = np.nan assert dummy2._check_X(X) - with pytest.raises(ValueError, match=r"cannot handle missing values"): + with pytest.raises(ValueError, match=r"has missing values, but"): dummy1._check_X(X) # multivariate equal length X = EQUAL_LENGTH_MULTIVARIATE_CLASSIFICATION["numpy3D"]["train"][0].copy() assert dummy2._check_X(X) - with pytest.raises(ValueError, match=r"cannot handle multivariate"): + with pytest.raises(ValueError, match=r"has multivariate series, but"): dummy1._check_X(X) # multivariate missing values X[2][1][5] = np.nan assert dummy2._check_X(X) with pytest.raises( - ValueError, match=r"cannot handle missing values or multivariate" + ValueError, match=r"has missing values and multivariate series, but" ): dummy1._check_X(X) # univariate equal length X = UNEQUAL_LENGTH_UNIVARIATE_CLASSIFICATION["np-list"]["train"][0] assert dummy2._check_X(X) - with pytest.raises(ValueError, match=r"cannot handle unequal length series"): + with pytest.raises(ValueError, match=r"has unequal length series, but"): dummy1._check_X(X) # multivariate unequal length X = UNEQUAL_LENGTH_MULTIVARIATE_CLASSIFICATION["np-list"]["train"][0] assert dummy2._check_X(X) with pytest.raises( - ValueError, match=r"cannot handle multivariate series or unequal length" + ValueError, match=r"has multivariate series and unequal length series, but" ): dummy1._check_X(X) @@ -141,7 +142,7 @@ def test_check_X(): dummy1._check_X(X) # invalid type - X = BaseCollectionEstimator() + X = MockClassifier() with pytest.raises( TypeError, match="must be of type np.ndarray, pd.DataFrame or list of" @@ -159,7 +160,7 @@ def test_convert_X(internal_type, data): This test runs a subset of these but also checks classifiers with multiple internal types. """ - cls = BaseCollectionEstimator() + cls = MockClassifier() # Equal length should default to numpy3D X = EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION[data]["train"][0] @@ -215,7 +216,7 @@ def test_preprocess_collection(data): """Test the functionality for preprocessing fit.""" data = EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION[data]["train"][0] data2 = np.random.random(size=(11, 1, 30)) - cls = BaseCollectionEstimator() + cls = MockClassifier() X = cls._preprocess_collection(data) assert cls._n_jobs == 1 @@ -223,13 +224,13 @@ def test_preprocess_collection(data): assert get_type(X) == "numpy3D" tags = {"capability:multithreading": True} - cls = BaseCollectionEstimator() + cls = MockClassifier() cls.set_tags(**tags) with pytest.raises(AttributeError, match="self.n_jobs must be set"): cls._preprocess_collection(data) # Test two calls do not overwrite metadata (predict should not reset fit meta) - cls = BaseCollectionEstimator() + cls = MockClassifier() cls._preprocess_collection(data) meta = cls.metadata_ cls._preprocess_collection(data2) @@ -247,7 +248,7 @@ def test_convert_np_list(): assert np2[0].shape == (1, 10) assert np2[1].shape == (1, 20) assert np2[2].shape == (1, 30) - dummy1 = BaseCollectionEstimator() + dummy1 = MockClassifier() x1 = np.random.random(size=(1, 10)) x2 = np.random.rand(10) x3 = np.random.rand(10) diff --git a/aeon/base/tests/test_base_series.py b/aeon/base/tests/test_base_series.py index 207b4c678b..5c3a8003c5 100644 --- a/aeon/base/tests/test_base_series.py +++ b/aeon/base/tests/test_base_series.py @@ -5,7 +5,7 @@ import pytest from numpy.testing import assert_equal -from aeon.base import BaseSeriesEstimator +from aeon.testing.mock_estimators._mock_anomaly_detectors import MockAnomalyDetector UNIVARIATE = { "np.ndarray": np.random.random(size=(20)), @@ -56,7 +56,10 @@ def test_check_X(): """Test if capabilities correctly tested in _check_X.""" - dummy = BaseSeriesEstimator(axis=1) + dummy = MockAnomalyDetector() + dummy.set_tags( + **{"capability:multivariate": False, "capability:missing_values": False} + ) # check basic univariate input meta = dummy._check_X(UNIVARIATE["np.ndarray"], axis=1) @@ -160,7 +163,7 @@ def test_check_X(): @pytest.mark.parametrize("input_type", VALID_INPUT_TYPES) def test_convert_X_ndarray_inner(input_type): """Test _convert_X on with np.ndarray inner type.""" - dummy = BaseSeriesEstimator(axis=1) + dummy = MockAnomalyDetector() dummy.set_tags(**{"X_inner_type": "np.ndarray"}) # test univariate X = UNIVARIATE[input_type] @@ -194,7 +197,7 @@ def test_convert_X_ndarray_inner(input_type): @pytest.mark.parametrize("input_type", VALID_INPUT_TYPES) def test_convert_X_dataframe_inner(input_type): """Test _convert_X on with pd.DataFrame inner type.""" - dummy = BaseSeriesEstimator(axis=1) + dummy = MockAnomalyDetector() dummy.set_tags(**{"X_inner_type": "pd.DataFrame"}) # test univariate X = UNIVARIATE[input_type] @@ -227,7 +230,7 @@ def test_convert_X_dataframe_inner(input_type): def test_convert_X_invalid(): """Test _convert_X for invalid inputs.""" - dummy = BaseSeriesEstimator(axis=1) + dummy = MockAnomalyDetector() with pytest.raises(ValueError, match="Input axis should be 0 or 1, saw 2"): dummy._convert_X(UNIVARIATE["np.ndarray"], axis=2) @@ -243,7 +246,7 @@ def test_convert_X_invalid(): @pytest.mark.parametrize("inner_type", VALID_INNER_TYPES) def test_preprocess_series(input_type, inner_type): """Test _preprocess_series for different input and inner types.""" - dummy = BaseSeriesEstimator(axis=1) + dummy = MockAnomalyDetector() dummy.set_tags(**{"X_inner_type": inner_type}) inner_name = inner_type.split(".")[1] # test univariate @@ -287,7 +290,7 @@ def test_preprocess_series(input_type, inner_type): def test_axis(): """Test axis property.""" - dummy = BaseSeriesEstimator(axis=1) + dummy = MockAnomalyDetector() dummy.set_tags(**{"capability:multivariate": True}) X = MULTIVARIATE["np.ndarray"] diff --git a/aeon/base/tests/test_compose.py b/aeon/base/tests/test_compose.py index 55ba965e72..0b6d9b75a0 100644 --- a/aeon/base/tests/test_compose.py +++ b/aeon/base/tests/test_compose.py @@ -74,7 +74,6 @@ def test_get_fitted_params(): expected = { "classes_", "ensemble_", - "fit_time_", "metadata_", "n_classes_", "weights_", @@ -90,13 +89,11 @@ def test_get_fitted_params(): { "clf1", "clf1__classes_", - "clf1__fit_time_", "clf1__foo_", "clf1__metadata_", "clf1__n_classes_", "clf2", "clf2__classes_", - "clf2__fit_time_", "clf2__foo_", "clf2__metadata_", "clf2__n_classes_", diff --git a/aeon/classification/base.py b/aeon/classification/base.py index ae050e6860..03cbc356d6 100644 --- a/aeon/classification/base.py +++ b/aeon/classification/base.py @@ -18,12 +18,9 @@ class name: BaseClassifier fitted state inspection - check_is_fitted() """ -__all__ = [ - "BaseClassifier", -] __maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"] +__all__ = ["BaseClassifier"] -import time from abc import abstractmethod from typing import final @@ -34,7 +31,6 @@ class name: BaseClassifier from aeon.base import BaseCollectionEstimator from aeon.base._base import _clone_estimator -from aeon.utils.validation._dependencies import _check_estimator_deps from aeon.utils.validation.collection import get_n_cases from aeon.utils.validation.labels import check_classification_y @@ -65,13 +61,14 @@ class BaseClassifier(BaseCollectionEstimator): "capability:contractable": False, } + @abstractmethod def __init__(self): self.classes_ = [] # classes seen in y, unique labels self.n_classes_ = -1 # number of unique classes in y self._class_dictionary = {} self._estimator_type = "classifier" + super().__init__() - _check_estimator_deps(self) @final def fit(self, X, y) -> BaseCollectionEstimator: @@ -111,13 +108,11 @@ def fit(self, X, y) -> BaseCollectionEstimator: Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ - start = int(round(time.time() * 1000)) X, y, single_class = self._fit_setup(X, y) if not single_class: self._fit(X, y) - self.fit_time_ = int(round(time.time() * 1000)) - start # this should happen last self.is_fitted = True return self diff --git a/aeon/classification/deep_learning/base.py b/aeon/classification/deep_learning/base.py index ebee67af7a..2ed56bc0bc 100644 --- a/aeon/classification/deep_learning/base.py +++ b/aeon/classification/deep_learning/base.py @@ -47,6 +47,7 @@ class BaseDeepClassifier(BaseClassifier): "python_dependencies": "tensorflow", } + @abstractmethod def __init__( self, batch_size=40, diff --git a/aeon/classification/early_classification/base.py b/aeon/classification/early_classification/base.py index 5819c53308..e058449ba0 100644 --- a/aeon/classification/early_classification/base.py +++ b/aeon/classification/early_classification/base.py @@ -21,12 +21,9 @@ class name: BaseEarlyClassifier streaming decision info - state_info attribute """ -__all__ = [ - "BaseEarlyClassifier", -] -__maintainer__ = [] +__maintainer__ = ["MatthewMiddlehurst"] +__all__ = ["BaseEarlyClassifier"] -import time from abc import abstractmethod import numpy as np @@ -49,8 +46,6 @@ class BaseEarlyClassifier(BaseCollectionEstimator): Class labels, possibly strings. n_classes_ : int Number of classes (length of classes_). - fit_time_ : int - Time (in milliseconds) for fit to run. _class_dictionary : dict dictionary mapping classes_ onto integers 0...n_classes_-1. _n_jobs : int, default=1 @@ -63,12 +58,11 @@ class BaseEarlyClassifier(BaseCollectionEstimator): "fit_is_empty": False, } + @abstractmethod def __init__(self): self.classes_ = [] self.n_classes_ = 0 - self.fit_time_ = 0 self._class_dictionary = {} - self._n_jobs = 1 """ An array containing the state info for each decision in X from update and @@ -114,11 +108,9 @@ def fit(self, X, y): self.reset() # All of this can move up to BaseCollection - start = int(round(time.time() * 1000)) X = self._preprocess_collection(X) y = BaseClassifier._check_y(self, y, self.metadata_["n_cases"]) self._fit(X, y) - self.fit_time_ = int(round(time.time() * 1000)) - start # this should happen last self.is_fitted = True return self diff --git a/aeon/classification/shapelet_based/_rdst.py b/aeon/classification/shapelet_based/_rdst.py index 250617d141..cfdbbeb571 100644 --- a/aeon/classification/shapelet_based/_rdst.py +++ b/aeon/classification/shapelet_based/_rdst.py @@ -92,8 +92,6 @@ class RDSTClassifier(BaseClassifier): The unique class labels in the training set. n_classes_ : int The number of unique classes in the training set. - fit_time_ : int - The time (in milliseconds) for ``fit`` to run. transformed_data_ : list of shape (n_estimators) of ndarray The transformed training dataset for all classifiers. Only saved when ``save_transformed_data`` is `True`. diff --git a/aeon/classification/shapelet_based/_stc.py b/aeon/classification/shapelet_based/_stc.py index c0ac9df290..9bd80a8cc2 100644 --- a/aeon/classification/shapelet_based/_stc.py +++ b/aeon/classification/shapelet_based/_stc.py @@ -77,8 +77,6 @@ class ShapeletTransformClassifier(BaseClassifier): The unique class labels in the training set. n_classes_ : int The number of unique classes in the training set. - fit_time_ : int - The time (in milliseconds) for ``fit`` to run. n_cases_ : int The number of train cases in the training set. n_channels_ : int diff --git a/aeon/classification/tests/test_base.py b/aeon/classification/tests/test_base.py index 353e1eaa7f..e59baa1bf4 100644 --- a/aeon/classification/tests/test_base.py +++ b/aeon/classification/tests/test_base.py @@ -139,7 +139,7 @@ def test_unequal_length_input(data): # Unable to handle unequal length series dummy = MockClassifier() - with pytest.raises(ValueError, match=r"cannot handle unequal length series"): + with pytest.raises(ValueError, match=r"has unequal length series, but"): dummy.fit(X, y) # Able to handle unequal length series @@ -170,7 +170,7 @@ def test_multivariate_equal_length_input(data): # Unable to handle multivariate series dummy = MockClassifier() - with pytest.raises(ValueError, match=r"cannot handle multivariate series"): + with pytest.raises(ValueError, match=r"has multivariate series, but"): dummy.fit(X, y) # Able to handle multivariate series diff --git a/aeon/clustering/base.py b/aeon/clustering/base.py index 39f216933a..6c8b4344ae 100644 --- a/aeon/clustering/base.py +++ b/aeon/clustering/base.py @@ -3,14 +3,12 @@ __maintainer__ = [] __all__ = ["BaseClusterer"] -import time from abc import abstractmethod from typing import final import numpy as np from aeon.base import BaseCollectionEstimator -from aeon.utils.validation._dependencies import _check_estimator_deps class BaseClusterer(BaseCollectionEstimator): @@ -26,13 +24,13 @@ class BaseClusterer(BaseCollectionEstimator): "fit_is_empty": False, } + @abstractmethod def __init__(self): # required for compatibility with some sklearn interfaces e.g. # CalibratedClassifierCV self._estimator_type = "clusterer" super().__init__() - _check_estimator_deps(self) @final def fit(self, X, y=None) -> BaseCollectionEstimator: @@ -56,10 +54,8 @@ def fit(self, X, y=None) -> BaseCollectionEstimator: Fitted estimator. """ self.reset() - _start_time = int(round(time.time() * 1000)) X = self._preprocess_collection(X) self._fit(X) - self.fit_time_ = int(round(time.time() * 1000)) - _start_time self.is_fitted = True return self diff --git a/aeon/clustering/deep_learning/base.py b/aeon/clustering/deep_learning/base.py index cedac33546..4a05e8c662 100644 --- a/aeon/clustering/deep_learning/base.py +++ b/aeon/clustering/deep_learning/base.py @@ -37,6 +37,7 @@ class BaseDeepClusterer(BaseClusterer): "python_dependencies": "tensorflow", } + @abstractmethod def __init__( self, estimator=None, diff --git a/aeon/clustering/tests/test_base.py b/aeon/clustering/tests/test_base.py index 8a8fb81b2d..ee765a2f7a 100644 --- a/aeon/clustering/tests/test_base.py +++ b/aeon/clustering/tests/test_base.py @@ -43,6 +43,9 @@ def test_correct_input(): class _TestClusterer(BaseClusterer): """Clusterer for testing base class fit/predict/predict_proba.""" + def __init__(self): + super().__init__() + def _fit(self, X, y=None): """Fit dummy.""" return self diff --git a/aeon/networks/base.py b/aeon/networks/base.py index ec86878dd7..e517894a49 100644 --- a/aeon/networks/base.py +++ b/aeon/networks/base.py @@ -13,17 +13,18 @@ class BaseDeepLearningNetwork(ABC): """Abstract base class for deep learning networks.""" - def __init__(self, soft_dependencies="tensorflow", python_version="<3.13"): - _check_soft_dependencies(soft_dependencies) - _check_python_version(python_version) - super().__init__() - _config = { "python_dependencies": ["tensorflow"], "python_version": "<3.13", "structure": "encoder", } + @abstractmethod + def __init__(self, soft_dependencies="tensorflow", python_version="<3.13"): + _check_soft_dependencies(soft_dependencies) + _check_python_version(python_version) + super().__init__() + @abstractmethod def build_network(self, input_shape, **kwargs): """Construct a network and return its input and output layers. diff --git a/aeon/regression/base.py b/aeon/regression/base.py index 7f861bfd36..7f9521b5ee 100644 --- a/aeon/regression/base.py +++ b/aeon/regression/base.py @@ -18,11 +18,8 @@ class name: BaseRegressor """ __maintainer__ = [] -__all__ = [ - "BaseRegressor", -] +__all__ = ["BaseRegressor"] -import time from abc import abstractmethod from typing import final @@ -45,16 +42,6 @@ class BaseRegressor(BaseCollectionEstimator): Attributes ---------- - fit_time_ : int - Time (in milliseconds) for fit to run. - _n_jobs : int - Number of threads to use in fit as determined by n_jobs. - - fit_time_ : int - Time (in milliseconds) for ``fit`` to run. - _n_jobs : int - Number of threads to use in estimator methods such as ``fit`` and ``predict``. - Determined by the ``n_jobs`` parameter if present. _estimator_type : string The type of estimator. Required by some ``sklearn`` tools, set to "regressor". """ @@ -65,11 +52,8 @@ class BaseRegressor(BaseCollectionEstimator): "capability:contractable": False, } + @abstractmethod def __init__(self): - # reserved attributes written to in fit - self.fit_time_ = -1 - self._n_jobs = 1 - # required for compatibility with some sklearn interfaces self._estimator_type = "regressor" @@ -113,12 +97,10 @@ def fit(self, X, y) -> BaseCollectionEstimator: Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ - start = int(round(time.time() * 1000)) X, y = self._fit_setup(X, y) self._fit(X, y) - self.fit_time_ = int(round(time.time() * 1000)) - start # this should happen last self.is_fitted = True return self diff --git a/aeon/regression/deep_learning/base.py b/aeon/regression/deep_learning/base.py index ff1ed2a40f..b48e3b2792 100644 --- a/aeon/regression/deep_learning/base.py +++ b/aeon/regression/deep_learning/base.py @@ -40,6 +40,7 @@ class BaseDeepRegressor(BaseRegressor): "python_dependencies": "tensorflow", } + @abstractmethod def __init__(self, batch_size=40, last_file_name="last_model"): self.batch_size = batch_size self.last_file_name = last_file_name diff --git a/aeon/regression/shapelet_based/_rdst.py b/aeon/regression/shapelet_based/_rdst.py index ed02775815..dcc77a3988 100644 --- a/aeon/regression/shapelet_based/_rdst.py +++ b/aeon/regression/shapelet_based/_rdst.py @@ -74,8 +74,6 @@ class RDSTRegressor(BaseRegressor): Attributes ---------- - fit_time_ : int - The time (in milliseconds) for ``fit`` to run. transformed_data_ : list of shape (n_estimators) of ndarray The transformed training dataset for all classifiers. Only saved when ``save_transformed_data`` is `True`. diff --git a/aeon/regression/tests/test_base.py b/aeon/regression/tests/test_base.py index d06e0bf710..30302de8c7 100644 --- a/aeon/regression/tests/test_base.py +++ b/aeon/regression/tests/test_base.py @@ -18,6 +18,9 @@ class _TestRegressor(BaseRegressor): """Dummy regressor for testing base class fit/predict.""" + def __init__(self): + super().__init__() + def _fit(self, X, y): """Fit dummy.""" return self @@ -37,6 +40,9 @@ class _DummyHandlesAllInput(BaseRegressor): "X_inner_type": ["np-list", "numpy3D"], } + def __init__(self): + super().__init__() + def _fit(self, X, y): """Fit dummy.""" return self @@ -56,6 +62,9 @@ class _TestHandlesAllInput(BaseRegressor): "X_inner_type": ["np-list", "numpy3D"], } + def __init__(self): + super().__init__() + def _fit(self, X, y): """Fit dummy.""" return self @@ -114,7 +123,7 @@ def test_unequal_length_input(data): dummy = _TestRegressor() X = UNEQUAL_LENGTH_UNIVARIATE_REGRESSION[data]["train"][0] y = np.random.random(size=10) - with pytest.raises(ValueError, match=r"cannot handle unequal length series"): + with pytest.raises(ValueError, match=r"has unequal length series, but"): dummy.fit(X, y) dummy = _TestHandlesAllInput() _assert_fit_predict(dummy, X, y) diff --git a/aeon/segmentation/_eagglo.py b/aeon/segmentation/_eagglo.py index 67605b7761..74a585ed4b 100644 --- a/aeon/segmentation/_eagglo.py +++ b/aeon/segmentation/_eagglo.py @@ -80,7 +80,7 @@ class EAggloSegmenter(BaseSegmenter): """ _tags = { - "X_inner_type": "pd.DataFrame", # One of VALID_INNER_TYPES + "X_inner_type": "pd.DataFrame", "capability:multivariate": True, "fit_is_empty": False, "returns_dense": False, diff --git a/aeon/segmentation/base.py b/aeon/segmentation/base.py index 6fbcc93100..7a746fd6a3 100644 --- a/aeon/segmentation/base.py +++ b/aeon/segmentation/base.py @@ -10,7 +10,7 @@ import pandas as pd from aeon.base import BaseSeriesEstimator -from aeon.base._base_series import VALID_INPUT_TYPES +from aeon.base._base_series import VALID_SERIES_INPUT_TYPES class BaseSegmenter(BaseSeriesEstimator): @@ -64,12 +64,13 @@ class BaseSegmenter(BaseSeriesEstimator): """ _tags = { - "X_inner_type": "np.ndarray", # One of VALID_INNER_TYPES + "X_inner_type": "np.ndarray", # One of VALID_SERIES_INNER_TYPES "fit_is_empty": True, "requires_y": False, "returns_dense": True, } + @abstractmethod def __init__(self, axis, n_segments=2): self.n_segments = n_segments @@ -86,9 +87,9 @@ def fit(self, X, y=None, axis=1): Parameters ---------- - X : One of ``VALID_INPUT_TYPES`` + X : One of ``VALID_SERIES_INPUT_TYPES`` Input time series to fit a segmenter. - y : One of ``VALID_INPUT_TYPES`` or None, default None + y : One of ``VALID_SERIES_INPUT_TYPES`` or None, default None Training time series, a labeled 1D series same length as X for supervised segmentation. axis : int, default = None @@ -128,7 +129,7 @@ def predict(self, X, axis=1): Parameters ---------- - X : One of ``VALID_INPUT_TYPES`` + X : One of ``VALID_SERIES_INPUT_TYPES`` Input time series axis : int, default = None Axis along which to segment if passed a multivariate series (2D input) @@ -168,15 +169,15 @@ def _predict(self, X) -> np.ndarray: """Create and return a segmentation of X.""" ... - def _check_y(self, y: VALID_INPUT_TYPES): + def _check_y(self, y: VALID_SERIES_INPUT_TYPES): """Check y specific to segmentation. y must be a univariate series """ - if type(y) not in VALID_INPUT_TYPES: + if type(y) not in VALID_SERIES_INPUT_TYPES: raise ValueError( f"Error in input type for y: it should be one of " - f"{VALID_INPUT_TYPES}, saw {type(y)}" + f"{VALID_SERIES_INPUT_TYPES}, saw {type(y)}" ) if isinstance(y, np.ndarray): # Check valid shape diff --git a/aeon/similarity_search/base.py b/aeon/similarity_search/base.py index 94febb93da..5b0ce8c555 100644 --- a/aeon/similarity_search/base.py +++ b/aeon/similarity_search/base.py @@ -60,6 +60,7 @@ class BaseSimilaritySearch(BaseCollectionEstimator): "X_inner_type": ["np-list", "numpy3D"], } + @abstractmethod def __init__( self, distance: str = "euclidean", diff --git a/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py b/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py index 41eda5a86b..0d94fbc34f 100644 --- a/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py +++ b/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py @@ -6,7 +6,7 @@ import pytest from aeon.base._base import _clone_estimator -from aeon.base._base_series import VALID_INNER_TYPES +from aeon.base._base_series import VALID_SERIES_INNER_TYPES from aeon.testing.data_generation import ( make_example_1d_numpy, make_example_2d_numpy_series, @@ -46,7 +46,7 @@ def check_anomaly_detector_overrides_and_tags(estimator_class): # Test valid tag for X_inner_type X_inner_type = estimator_class.get_class_tag(tag_name="X_inner_type") - assert X_inner_type in VALID_INNER_TYPES + assert X_inner_type in VALID_SERIES_INNER_TYPES # Must have at least one set to True multi = estimator_class.get_class_tag(tag_name="capability:multivariate") diff --git a/aeon/testing/estimator_checking/_yield_segmentation_checks.py b/aeon/testing/estimator_checking/_yield_segmentation_checks.py index 898f034f05..6c2c964f5a 100644 --- a/aeon/testing/estimator_checking/_yield_segmentation_checks.py +++ b/aeon/testing/estimator_checking/_yield_segmentation_checks.py @@ -6,7 +6,7 @@ import pytest from aeon.base._base import _clone_estimator -from aeon.base._base_series import VALID_INNER_TYPES +from aeon.base._base_series import VALID_SERIES_INNER_TYPES def _yield_segmentation_checks(estimator_class, estimator_instances, datatypes): @@ -33,7 +33,7 @@ def check_segmenter_base_functionality(estimator_class): assert not fit_is_empty == "_fit" not in estimator_class.__dict__ # Test valid tag for X_inner_type X_inner_type = estimator_class.get_class_tag(tag_name="X_inner_type") - assert X_inner_type in VALID_INNER_TYPES + assert X_inner_type in VALID_SERIES_INNER_TYPES # Must have at least one set to True multi = estimator_class.get_class_tag(tag_name="capability:multivariate") uni = estimator_class.get_class_tag(tag_name="capability:univariate") diff --git a/aeon/testing/mock_estimators/_mock_classifiers.py b/aeon/testing/mock_estimators/_mock_classifiers.py index da766b8e16..bcfcb8162e 100644 --- a/aeon/testing/mock_estimators/_mock_classifiers.py +++ b/aeon/testing/mock_estimators/_mock_classifiers.py @@ -12,6 +12,9 @@ class MockClassifier(BaseClassifier): """Mock classifier for testing fit/predict.""" + def __init__(self): + super().__init__() + def _fit(self, X, y): """Fit dummy.""" self.foo_ = "bar" diff --git a/aeon/transformations/base.py b/aeon/transformations/base.py index f6ccf5b889..7e4998a910 100644 --- a/aeon/transformations/base.py +++ b/aeon/transformations/base.py @@ -19,7 +19,10 @@ class BaseTransformer(BaseAeonEstimator): "removes_missing_values": False, } + @abstractmethod def __init__(self): + self._estimator_type = "transformer" + super().__init__() @abstractmethod diff --git a/aeon/transformations/collection/_reduce.py b/aeon/transformations/collection/_reduce.py index ce8bf75d61..00831afe96 100644 --- a/aeon/transformations/collection/_reduce.py +++ b/aeon/transformations/collection/_reduce.py @@ -22,6 +22,9 @@ class Tabularizer(BaseCollectionTransformer): "capability:multivariate": True, } + def __init__(self): + super().__init__() + def _transform(self, X, y=None): """Transform collection into tabular dataframe. diff --git a/aeon/transformations/collection/base.py b/aeon/transformations/collection/base.py index 7b601126a7..e6c774fee9 100644 --- a/aeon/transformations/collection/base.py +++ b/aeon/transformations/collection/base.py @@ -44,9 +44,8 @@ class BaseCollectionTransformer(BaseCollectionEstimator, BaseTransformer): "removes_unequal_length": False, } + @abstractmethod def __init__(self): - self._estimator_type = "transformer" - super().__init__() @final diff --git a/aeon/transformations/collection/channel_selection/base.py b/aeon/transformations/collection/channel_selection/base.py index f77afd9764..e9459e3d25 100644 --- a/aeon/transformations/collection/channel_selection/base.py +++ b/aeon/transformations/collection/channel_selection/base.py @@ -6,6 +6,7 @@ __maintainer__ = ["TonyBagnall"] +from abc import abstractmethod from aeon.transformations.collection.base import BaseCollectionTransformer @@ -22,9 +23,8 @@ class BaseChannelSelector(BaseCollectionTransformer): List of channels selected in fit. """ - def __init__( - self, - ): + @abstractmethod + def __init__(self): self.channels_selected_ = [] super().__init__() diff --git a/aeon/transformations/collection/compose/_identity.py b/aeon/transformations/collection/compose/_identity.py index b9a7652bcc..20c6674b6b 100644 --- a/aeon/transformations/collection/compose/_identity.py +++ b/aeon/transformations/collection/compose/_identity.py @@ -16,6 +16,9 @@ class CollectionId(BaseCollectionTransformer): "capability:missing_values": True, } + def __init__(self): + super().__init__() + def _transform(self, X, y=None): return X diff --git a/aeon/transformations/collection/signature_based/_augmentations.py b/aeon/transformations/collection/signature_based/_augmentations.py index 0bd4525d53..850a40f709 100644 --- a/aeon/transformations/collection/signature_based/_augmentations.py +++ b/aeon/transformations/collection/signature_based/_augmentations.py @@ -73,6 +73,9 @@ class _AddTime(BaseCollectionTransformer): "fit_is_empty": True, # is fit empty and can be skipped? Yes = True } + def __init__(self): + super().__init__() + def _transform(self, X, y=None): data = np.swapaxes(X, 1, 2) # Batch and length dim @@ -98,6 +101,9 @@ class _InvisibilityReset(BaseCollectionTransformer): "fit_is_empty": True, # is fit empty and can be skipped? Yes = True } + def __init__(self): + super().__init__() + def _transform(self, X, y=None): X = np.swapaxes(X, 1, 2) @@ -138,6 +144,9 @@ class _LeadLag(BaseCollectionTransformer): "fit_is_empty": True, } + def __init__(self): + super().__init__() + def _transform(self, X, y=None): X = np.swapaxes(X, 1, 2) @@ -193,6 +202,9 @@ class _BasePoint(BaseCollectionTransformer): "capability:multivariate": True, } + def __init__(self): + super().__init__() + def _transform(self, X, y=None): X = np.swapaxes(X, 1, 2) zero_vec = np.zeros(shape=(X.shape[0], 1, X.shape[2])) diff --git a/aeon/transformations/series/base.py b/aeon/transformations/series/base.py index 0a4eb894e4..cdbd7e50c9 100644 --- a/aeon/transformations/series/base.py +++ b/aeon/transformations/series/base.py @@ -27,6 +27,7 @@ class BaseSeriesTransformer(BaseSeriesEstimator, BaseTransformer): "output_data_type": "Series", } + @abstractmethod def __init__(self, axis): super().__init__(axis=axis) @@ -294,9 +295,9 @@ def _postprocess_series(self, Xt, axis): Parameters ---------- - Xt: one of aeon.base._base_series.VALID_INPUT_TYPES + Xt: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES A valid aeon time series data structure. See - aeon.base._base_series.VALID_INPUT_TYPES for aeon supported types. + aeon.base._base_series.VALID_SERIES_INPUT_TYPES for aeon supported types. Intended for algorithms which have another series as output. axis: int The axids of time in the series. @@ -309,7 +310,7 @@ def _postprocess_series(self, Xt, axis): Returns ------- - Xt: one of aeon.base._base_series.VALID_INPUT_TYPES + Xt: one of aeon.base._base_series.VALID_SERIES_INPUT_TYPES New time series input reshaped to match the original input. """ if axis is None: diff --git a/aeon/utils/_data_types.py b/aeon/utils/_data_types.py index 0ae6ce9311..0202679fb3 100644 --- a/aeon/utils/_data_types.py +++ b/aeon/utils/_data_types.py @@ -8,24 +8,23 @@ Checks of input data are handled in the `aeon.utils.validation` module, and conversion is handled in the `aeon.utils.conversion` module. - """ SERIES_DATA_TYPES = [ "pd.Series", # univariate time series of shape (n_timepoints) - "pd.DataFrame", # multivariate time series of shape (n_timepoints, n_channels) by - # default or (n_channels, n_timepoints) if set by axis ==1 - "np.ndarray", # uni/multivariate time series of shape (n_timepoints, n_channels) - # by default or (n_channels, n_timepoints) if set by axis ==1 + "pd.DataFrame", # uni/multivariate time series of shape (n_timepoints, + # n_channels) by default or (n_channels, n_timepoints) if set by axis == 1 + "np.ndarray", # uni/multivariate time series of shape (n_timepoints, + # n_channels) by default or (n_channels, n_timepoints) if set by axis ==1 ] COLLECTIONS_DATA_TYPES = [ "numpy3D", # 3D np.ndarray of format (n_cases, n_channels, n_timepoints) - "np-list", # python list of 2D numpy array of length [n_cases], + "np-list", # python list of 2D np.ndarray of length [n_cases], # each of shape (n_channels, n_timepoints_i) - "df-list", # python list of 2D pd.DataFrames of length [n_cases], each a of - # shape (n_channels, n_timepoints_i) + "df-list", # python list of 2D pd.DataFrames of length [n_cases], each + # of shape (n_channels, n_timepoints_i) "numpy2D", # 2D np.ndarray of shape (n_cases, n_timepoints) "pd-wide", # 2D pd.DataFrame of shape (n_cases, n_timepoints) "pd-multiindex", # pd.DataFrame with multi-index, diff --git a/aeon/utils/conversion/_convert_collection.py b/aeon/utils/conversion/_convert_collection.py index 9c3a3eb33c..e41ed0c8a2 100644 --- a/aeon/utils/conversion/_convert_collection.py +++ b/aeon/utils/conversion/_convert_collection.py @@ -440,7 +440,7 @@ def resolve_unequal_length_inner_type(inner_types: Sequence[str]) -> str: return "pd-multiindex" raise ValueError( f"Error, no valid inner types for unequal series in {inner_types} " - f"must be np-list or pd-multiindex" + f"must be np-list, df-list or pd-multiindex" ) diff --git a/examples/base/series_estimator.ipynb b/examples/base/series_estimator.ipynb index ecfab1e302..57e719cd78 100644 --- a/examples/base/series_estimator.ipynb +++ b/examples/base/series_estimator.ipynb @@ -38,11 +38,12 @@ "import numpy as np\n", "import pandas as pd\n", "\n", - "from aeon.base import BaseSeriesEstimator\n", + "from aeon.testing.mock_estimators._mock_anomaly_detectors import MockAnomalyDetector\n", "\n", - "# We use the abstract base class for example purposes, regular classes will not\n", - "# have a class axis parameter.\n", - "bs = BaseSeriesEstimator(axis=0)" + "# We use a mock anomaly detector for example purposes, It has a default axis of 1,\n", + "# but we will change it to 0 for these examples\n", + "bs = MockAnomalyDetector()\n", + "bs.axis = 0" ], "outputs": [], "execution_count": 14