diff --git a/.github/utilities/run_examples.sh b/.github/utilities/run_examples.sh index 3eeac963a3..fd7376c05b 100755 --- a/.github/utilities/run_examples.sh +++ b/.github/utilities/run_examples.sh @@ -9,6 +9,7 @@ excluded=() if [ "$1" = true ]; then excluded+=( "examples/datasets/load_data_from_web.ipynb" + "examples/benchmarking/published_results.ipynb" "examples/benchmarking/reference_results.ipynb" "examples/benchmarking/bakeoff_results.ipynb" "examples/benchmarking/regression.ipynb" @@ -21,6 +22,7 @@ if [ "$1" = true ]; then "examples/classification/interval_based.ipynb" "examples/classification/shapelet_based.ipynb" "examples/classification/convolution_based.ipynb" + "examples/similarity_search/code_speed.ipynb" ) fi diff --git a/.github/workflows/periodic_tests.yml b/.github/workflows/periodic_tests.yml index 8c5eeca564..ac81989b27 100644 --- a/.github/workflows/periodic_tests.yml +++ b/.github/workflows/periodic_tests.yml @@ -83,6 +83,31 @@ jobs: # Save cache with the current date (ENV set in numba_cache action) key: numba-run-notebook-examples-${{ runner.os }}-3.10-${{ env.CURRENT_DATE }} + test-core-imports: + runs-on: ubuntu-22.04 + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install aeon and dependencies + uses: nick-fields/retry@v3 + with: + timeout_minutes: 30 + max_attempts: 3 + command: python -m pip install . + + - name: Show dependencies + run: python -m pip list + + - name: Run import test + run: python aeon/testing/tests/test_core_imports.py + test-no-soft-deps: runs-on: ubuntu-22.04 diff --git a/.github/workflows/pr_core_dep_import.yml b/.github/workflows/pr_core_dep_import.yml new file mode 100644 index 0000000000..19fb56e294 --- /dev/null +++ b/.github/workflows/pr_core_dep_import.yml @@ -0,0 +1,43 @@ +name: PR module imports + +on: + push: + branches: + - main + pull_request: + branches: + - main + paths: + - "aeon/**" + - ".github/workflows/**" + - "pyproject.toml" + +concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.ref }} + cancel-in-progress: true + +jobs: + test-core-imports: + runs-on: ubuntu-22.04 + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install aeon and dependencies + uses: nick-fields/retry@v3 + with: + timeout_minutes: 30 + max_attempts: 3 + command: python -m pip install . + + - name: Show dependencies + run: python -m pip list + + - name: Run import test + run: python aeon/testing/tests/test_core_imports.py diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml index 0a5ed4c673..abbb0f596f 100644 --- a/.github/workflows/pr_pytest.yml +++ b/.github/workflows/pr_pytest.yml @@ -48,7 +48,7 @@ jobs: run: python -m pip list - name: Run tests - run: python -m pytest -n logical -k 'not TestAll' + run: python -m pytest -n logical pytest: runs-on: ${{ matrix.os }} diff --git a/.readthedocs.yml b/.readthedocs.yml index c031c0d25b..1b016c167c 100644 --- a/.readthedocs.yml +++ b/.readthedocs.yml @@ -11,7 +11,7 @@ python: - docs build: - os: ubuntu-20.04 + os: ubuntu-22.04 tools: python: "3.10" diff --git a/README.md b/README.md index f51a79291a..0763b2f5de 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ The following modules are still considered experimental, and the [deprecation po does not apply: - `anomaly_detection` +- `forecasting` - `segmentation` - `similarity_search` - `visualisation` diff --git a/aeon/base/tests/test_base_collection.py b/aeon/base/tests/test_base_collection.py index 97e2232c66..fff1f75f38 100644 --- a/aeon/base/tests/test_base_collection.py +++ b/aeon/base/tests/test_base_collection.py @@ -14,7 +14,7 @@ UNEQUAL_LENGTH_MULTIVARIATE_CLASSIFICATION, UNEQUAL_LENGTH_UNIVARIATE_CLASSIFICATION, ) -from aeon.utils import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES from aeon.utils.validation import get_type diff --git a/aeon/classification/distance_based/_proximity_tree.py b/aeon/classification/distance_based/_proximity_tree.py index e3db90864d..9af2edfe84 100644 --- a/aeon/classification/distance_based/_proximity_tree.py +++ b/aeon/classification/distance_based/_proximity_tree.py @@ -234,7 +234,7 @@ def _get_best_splitter(self, X, y): dist = distance( X[j], splitter[0][labels[k]], - metric=measure, + measure=measure, **splitter[1][measure], ) if dist < min_dist: @@ -320,7 +320,7 @@ def _build_tree(self, X, y, depth, node_id, parent_target_value=None): dist = distance( X[i], splitter[0][labels[j]], - metric=measure, + measure=measure, **splitter[1][measure], ) if dist < min_dist: @@ -404,7 +404,7 @@ def _classify(self, treenode, x): dist = distance( x, treenode.splitter[0][branches[i]], - metric=measure, + measure=measure, **treenode.splitter[1][measure], ) if dist < min_dist: diff --git a/aeon/classification/distance_based/_time_series_neighbors.py b/aeon/classification/distance_based/_time_series_neighbors.py index a24459d182..efb7473e65 100644 --- a/aeon/classification/distance_based/_time_series_neighbors.py +++ b/aeon/classification/distance_based/_time_series_neighbors.py @@ -111,7 +111,7 @@ def _fit(self, X, y): y : array-like, shape = (n_cases) The class labels. """ - self.metric_ = get_distance_function(metric=self.distance) + self.metric_ = get_distance_function(measure=self.distance) self.X_ = X self.classes_, self.y_ = np.unique(y, return_inverse=True) return self diff --git a/aeon/classification/tests/test_base.py b/aeon/classification/tests/test_base.py index e59baa1bf4..49782cab85 100644 --- a/aeon/classification/tests/test_base.py +++ b/aeon/classification/tests/test_base.py @@ -15,7 +15,7 @@ EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION, UNEQUAL_LENGTH_UNIVARIATE_CLASSIFICATION, ) -from aeon.utils import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES __maintainer__ = [] diff --git a/aeon/clustering/_clara.py b/aeon/clustering/_clara.py index 66da2e9920..edc334f6c9 100644 --- a/aeon/clustering/_clara.py +++ b/aeon/clustering/_clara.py @@ -42,7 +42,7 @@ class TimeSeriesCLARA(BaseClusterer): If a np.ndarray provided it must be of shape (n_clusters,) and contain the indexes of the time series to use as centroids. distance : str or Callable, default='msm' - Distance metric to compute similarity between time series. A list of valid + Distance measure to compute similarity between time series. A list of valid strings for metrics can be found in the documentation for :func:`aeon.distances.get_distance_function`. If a callable is passed it must be a function that takes two 2d numpy arrays as input and returns a float. @@ -73,7 +73,7 @@ class TimeSeriesCLARA(BaseClusterer): If `None`, the random number generator is the `RandomState` instance used by `np.random`. distance_params : dict, default=None - Dictionary containing kwargs for the distance metric being used. + Dictionary containing kwargs for the distance measure being used. Attributes ---------- @@ -189,7 +189,7 @@ def _fit(self, X: np.ndarray, y=None): curr_centers = pam.cluster_centers_ if isinstance(pam.distance, str): pairwise_matrix = pairwise_distance( - X, curr_centers, metric=self.distance, **pam._distance_params + X, curr_centers, measure=self.distance, **pam._distance_params ) else: pairwise_matrix = pairwise_distance( diff --git a/aeon/clustering/_clarans.py b/aeon/clustering/_clarans.py index 71ca1a9284..c13d177dfa 100644 --- a/aeon/clustering/_clarans.py +++ b/aeon/clustering/_clarans.py @@ -43,8 +43,8 @@ class TimeSeriesCLARANS(TimeSeriesKMedoids): If a np.ndarray provided it must be of shape (n_clusters,) and contain the indexes of the time series to use as centroids. distance : str or Callable, default='msm' - Distance metric to compute similarity between time series. A list of valid - strings for metrics can be found in the documentation for + Distance measure to compute similarity between time series. A list of valid + strings for measures can be found in the documentation for :func:`aeon.distances.get_distance_function`. If a callable is passed it must be a function that takes two 2d numpy arrays as input and returns a float. max_neighbours : int, default=None, @@ -62,7 +62,7 @@ class TimeSeriesCLARANS(TimeSeriesKMedoids): random_state : int or np.random.RandomState instance or None, default=None Determines random number generation for centroid initialization. distance_params : dict, default=None - Dictionary containing kwargs for the distance metric being used. + Dictionary containing kwargs for the distance measure being used. Attributes ---------- diff --git a/aeon/clustering/_elastic_som.py b/aeon/clustering/_elastic_som.py index 36a8769b13..e4edec42ab 100644 --- a/aeon/clustering/_elastic_som.py +++ b/aeon/clustering/_elastic_som.py @@ -44,8 +44,8 @@ class ElasticSOM(BaseClusterer): n_clusters : int, default=8 The number of clusters to form as well as the number of centroids to generate. distance : str or Callable, default='dtw' - Distance metric to compute similarity between time series. A list of valid - strings for metrics can be found in the documentation for + Distance measure to compute similarity between time series. A list of valid + strings for measures can be found in the documentation for :func:`aeon.distances.get_distance_function`. If a callable is passed it must be a function that takes two 2d numpy arrays as input and returns a float. init : str or np.ndarray, default='random' @@ -224,7 +224,7 @@ def _find_bmu(self, x, weights): pairwise_matrix = pairwise_distance( x, weights, - metric=self.distance, + measure=self.distance, **self._distance_params, ) return pairwise_matrix.argmin(axis=1) @@ -366,7 +366,7 @@ def _kmeans_plus_plus_center_initializer(self, X: np.ndarray): for _ in range(1, self.n_clusters): pw_dist = pairwise_distance( - X, X[indexes], metric=self.distance, **self._distance_params + X, X[indexes], measure=self.distance, **self._distance_params ) min_distances = pw_dist.min(axis=1) probabilities = min_distances / min_distances.sum() diff --git a/aeon/clustering/_k_means.py b/aeon/clustering/_k_means.py index e68d09e1b8..7c1e9a07d2 100644 --- a/aeon/clustering/_k_means.py +++ b/aeon/clustering/_k_means.py @@ -54,8 +54,8 @@ class TimeSeriesKMeans(BaseClusterer): n_timepoints) and contains the time series to use as centroids. distance : str or Callable, default='msm' - Distance metric to compute similarity between time series. A list of valid - strings for metrics can be found in the documentation for + Distance measure to compute similarity between time series. A list of valid + strings for measures can be found in the documentation for :func:`aeon.distances.get_distance_function`. If a callable is passed it must be a function that takes two 2d numpy arrays as input and returns a float. n_init : int, default=10 @@ -236,7 +236,7 @@ def _fit_one_init(self, X: np.ndarray) -> tuple: prev_labels = None for i in range(self.max_iter): curr_pw = pairwise_distance( - X, cluster_centres, metric=self.distance, **self._distance_params + X, cluster_centres, measure=self.distance, **self._distance_params ) curr_labels = curr_pw.argmin(axis=1) curr_inertia = curr_pw.min(axis=1).sum() @@ -273,13 +273,13 @@ def _fit_one_init(self, X: np.ndarray) -> tuple: def _predict(self, X: np.ndarray, y=None) -> np.ndarray: if isinstance(self.distance, str): pairwise_matrix = pairwise_distance( - X, self.cluster_centers_, metric=self.distance, **self._distance_params + X, self.cluster_centers_, measure=self.distance, **self._distance_params ) else: pairwise_matrix = pairwise_distance( X, self.cluster_centers_, - metric=self.distance, + measure=self.distance, **self._distance_params, ) return pairwise_matrix.argmin(axis=1) @@ -346,7 +346,7 @@ def _kmeans_plus_plus_center_initializer(self, X: np.ndarray): for _ in range(1, self.n_clusters): pw_dist = pairwise_distance( - X, X[indexes], metric=self.distance, **self._distance_params + X, X[indexes], measure=self.distance, **self._distance_params ) min_distances = pw_dist.min(axis=1) probabilities = min_distances / min_distances.sum() @@ -381,7 +381,7 @@ def _handle_empty_cluster( index_furthest_from_centre = curr_pw.min(axis=1).argmax() cluster_centres[current_empty_cluster_index] = X[index_furthest_from_centre] curr_pw = pairwise_distance( - X, cluster_centres, metric=self.distance, **self._distance_params + X, cluster_centres, measure=self.distance, **self._distance_params ) curr_labels = curr_pw.argmin(axis=1) curr_inertia = curr_pw.min(axis=1).sum() diff --git a/aeon/clustering/_k_medoids.py b/aeon/clustering/_k_medoids.py index a54220cec2..6a00f2a46e 100644 --- a/aeon/clustering/_k_medoids.py +++ b/aeon/clustering/_k_medoids.py @@ -56,8 +56,8 @@ class TimeSeriesKMedoids(BaseClusterer): If a np.ndarray provided it must be of shape (n_clusters,) and contain the indexes of the time series to use as centroids. distance : str or Callable, default='msm' - Distance metric to compute similarity between time series. A list of valid - strings for metrics can be found in the documentation for + Distance measure to compute similarity between time series. A list of valid + strings for measures can be found in the documentation for :func:`aeon.distances.get_distance_function`. If a callable is passed it must be a function that takes two 2d numpy arrays as input and returns a float. method : str, default='pam' @@ -88,7 +88,7 @@ class TimeSeriesKMedoids(BaseClusterer): If `None`, the random number generator is the `RandomState` instance used by `np.random`. distance_params: dict, default=None - Dictionary containing kwargs for the distance metric being used. + Dictionary containing kwargs for the distance measure being used. Attributes ---------- @@ -211,7 +211,7 @@ def _fit(self, X: np.ndarray, y=None): def _predict(self, X: np.ndarray, y=None) -> np.ndarray: if isinstance(self.distance, str): pairwise_matrix = pairwise_distance( - X, self.cluster_centers_, metric=self.distance, **self._distance_params + X, self.cluster_centers_, measure=self.distance, **self._distance_params ) else: pairwise_matrix = pairwise_distance( @@ -456,7 +456,7 @@ def _check_params(self, X: np.ndarray) -> None: f"n_clusters ({self.n_clusters}) cannot be larger than " f"n_cases ({X.shape[0]})" ) - self._distance_callable = get_distance_function(metric=self.distance) + self._distance_callable = get_distance_function(measure=self.distance) self._distance_cache = np.full((X.shape[0], X.shape[0]), np.inf) if self.method == "alternate": @@ -486,7 +486,7 @@ def _kmedoids_plus_plus_center_initializer(self, X: np.ndarray): for _ in range(1, self.n_clusters): pw_dist = pairwise_distance( - X, X[indexes], metric=self.distance, **self._distance_params + X, X[indexes], measure=self.distance, **self._distance_params ) min_distances = pw_dist.min(axis=1) probabilities = min_distances / min_distances.sum() diff --git a/aeon/clustering/averaging/_ba_petitjean.py b/aeon/clustering/averaging/_ba_petitjean.py index 3d456bfa69..6b3765f77e 100644 --- a/aeon/clustering/averaging/_ba_petitjean.py +++ b/aeon/clustering/averaging/_ba_petitjean.py @@ -55,7 +55,7 @@ def petitjean_barycenter_average( random_state: int or None, default=None Random state to use for the barycenter averaging. **kwargs - Keyword arguments to pass to the distance metric. + Keyword arguments to pass to the distance measure. Returns ------- diff --git a/aeon/clustering/averaging/_ba_subgradient.py b/aeon/clustering/averaging/_ba_subgradient.py index 12410ba6d9..369ad6bded 100644 --- a/aeon/clustering/averaging/_ba_subgradient.py +++ b/aeon/clustering/averaging/_ba_subgradient.py @@ -70,7 +70,7 @@ def subgradient_barycenter_average( random_state: int or None, default=None Random state to use for the barycenter averaging. **kwargs - Keyword arguments to pass to the distance metric. + Keyword arguments to pass to the distance measure. Returns ------- diff --git a/aeon/clustering/averaging/_ba_utils.py b/aeon/clustering/averaging/_ba_utils.py index 4b449fb0a8..496f1f6e74 100644 --- a/aeon/clustering/averaging/_ba_utils.py +++ b/aeon/clustering/averaging/_ba_utils.py @@ -31,7 +31,7 @@ def _medoids( return X if precomputed_pairwise_distance is None: - precomputed_pairwise_distance = pairwise_distance(X, metric=distance, **kwargs) + precomputed_pairwise_distance = pairwise_distance(X, measure=distance, **kwargs) x_size = X.shape[0] distance_matrix = np.zeros((x_size, x_size)) @@ -155,6 +155,6 @@ def _get_alignment_path( elif distance == "adtw": return adtw_alignment_path(ts, center, window=window, warp_penalty=warp_penalty) else: - # When numba version > 0.57 add more informative error with what metric + # When numba version > 0.57 add more informative error with what measure # was passed. raise ValueError("Distance parameter invalid") diff --git a/aeon/clustering/averaging/_barycenter_averaging.py b/aeon/clustering/averaging/_barycenter_averaging.py index 78f2583c59..48397f8f84 100644 --- a/aeon/clustering/averaging/_barycenter_averaging.py +++ b/aeon/clustering/averaging/_barycenter_averaging.py @@ -84,7 +84,7 @@ def elastic_barycenter_average( random_state: int or None, default=None Random state to use for the barycenter averaging. **kwargs - Keyword arguments to pass to the distance metric. + Keyword arguments to pass to the distance measure. Returns ------- diff --git a/aeon/distances/_distance.py b/aeon/distances/_distance.py index 6b3c9d91a3..0004884184 100644 --- a/aeon/distances/_distance.py +++ b/aeon/distances/_distance.py @@ -118,7 +118,7 @@ class DistanceKwargs(TypedDict, total=False): def distance( x: np.ndarray, y: np.ndarray, - metric: Union[str, DistanceFunction], + measure: Union[str, DistanceFunction], **kwargs: Unpack[DistanceKwargs], ) -> float: """Compute the distance between two time series. @@ -131,13 +131,13 @@ def distance( y : np.ndarray Second time series, either univariate, shape ``(n_timepoints,)``, or multivariate, shape ``(n_channels, n_timepoints)``. - metric : str or Callable - The distance metric to use. - A list of valid distance metrics can be found in the documentation for + measure : str or Callable + The distance measure to use. + A list of valid distance measures can be found in the documentation for :func:`aeon.distances.get_distance_function` or by calling the function :func:`aeon.distances.get_distance_function_names`. kwargs : Any - Arguments for metric. Refer to each metrics documentation for a list of + Arguments for measure. Refer to each measure documentation for a list of possible arguments. Returns @@ -149,7 +149,7 @@ def distance( ------ ValueError If x and y are not 1D, or 2D arrays. - If metric is not a valid string or callable. + If measure is not a valid string or callable. Examples -------- @@ -157,21 +157,21 @@ def distance( >>> from aeon.distances import distance >>> x = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) >>> y = np.array([[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]) - >>> distance(x, y, metric="dtw") + >>> distance(x, y, measure="dtw") 768.0 """ - if metric in DISTANCES_DICT: - return DISTANCES_DICT[metric]["distance"](x, y, **kwargs) - elif isinstance(metric, Callable): - return metric(x, y, **kwargs) + if measure in DISTANCES_DICT: + return DISTANCES_DICT[measure]["distance"](x, y, **kwargs) + elif isinstance(measure, Callable): + return measure(x, y, **kwargs) else: - raise ValueError("Metric must be one of the supported strings or a callable") + raise ValueError("Measure must be one of the supported strings or a callable") def pairwise_distance( x: np.ndarray, y: Optional[np.ndarray] = None, - metric: Union[str, DistanceFunction, None] = None, + measure: Union[str, DistanceFunction, None] = None, symmetric: bool = True, **kwargs: Unpack[DistanceKwargs], ) -> np.ndarray: @@ -185,20 +185,20 @@ def pairwise_distance( y : np.ndarray or None, default=None A single series or a collection of time series of shape ``(m_timepoints,)`` or ``(m_cases, m_timepoints)`` or ``(m_cases, m_channels, m_timepoints)`` - metric : str or Callable - The distance metric to use. - A list of valid distance metrics can be found in the documentation for + measure : str or Callable + The distance measure to use. + A list of valid distance measure can be found in the documentation for :func:`aeon.distances.get_distance_function` or by calling the function :func:`aeon.distances.get_distance_function_names`. symmetric : bool, default=True - If True and a function is provided as the "metric" paramter, then it will + If True and a function is provided as the "measure" paramter, then it will compute a symmetric distance matrix where d(x, y) = d(y, x). Only the lower triangle is calculated, and the upper triangle is ignored. If False and a - function is provided as the "metric" parameter, then it will compute an + function is provided as the "measure" parameter, then it will compute an asymmetric distance matrix, and the entire matrix (including both upper and lower triangles) is returned. kwargs : Any - Extra arguments for metric. Refer to each metric documentation for a list of + Extra arguments for measure. Refer to each measure documentation for a list of possible arguments. Returns @@ -211,7 +211,7 @@ def pairwise_distance( ValueError If X is not 2D or 3D array when only passing X. If X and y are not 1D, 2D or 3D arrays when passing both X and y. - If metric is not a valid string or callable. + If measure is not a valid string or callable. Examples -------- @@ -219,7 +219,7 @@ def pairwise_distance( >>> from aeon.distances import pairwise_distance >>> # Distance between each time series in a collection of time series >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) - >>> pairwise_distance(X, metric='dtw') + >>> pairwise_distance(X, measure='dtw') array([[ 0., 26., 108.], [ 26., 0., 26.], [108., 26., 0.]]) @@ -227,26 +227,26 @@ def pairwise_distance( >>> # Distance between two collections of time series >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) >>> y = np.array([[[11, 12, 13]],[[14, 15, 16]], [[17, 18, 19]]]) - >>> pairwise_distance(X, y, metric='dtw') + >>> pairwise_distance(X, y, measure='dtw') array([[300., 507., 768.], [147., 300., 507.], [ 48., 147., 300.]]) >>> X = np.array([[[1, 2, 3]],[[4, 5, 6]], [[7, 8, 9]]]) >>> y_univariate = np.array([11, 12, 13]) - >>> pairwise_distance(X, y_univariate, metric='dtw') + >>> pairwise_distance(X, y_univariate, measure='dtw') array([[300.], [147.], [ 48.]]) """ - if metric in PAIRWISE_DISTANCE: - return DISTANCES_DICT[metric]["pairwise_distance"](x, y, **kwargs) - elif isinstance(metric, Callable): + if measure in PAIRWISE_DISTANCE: + return DISTANCES_DICT[measure]["pairwise_distance"](x, y, **kwargs) + elif isinstance(measure, Callable): if y is None and not symmetric: - return _custom_func_pairwise(x, x, metric, **kwargs) - return _custom_func_pairwise(x, y, metric, **kwargs) + return _custom_func_pairwise(x, x, measure, **kwargs) + return _custom_func_pairwise(x, y, measure, **kwargs) else: - raise ValueError("Metric must be one of the supported strings or a callable") + raise ValueError("Measure must be one of the supported strings or a callable") def _custom_func_pairwise( @@ -302,7 +302,7 @@ def _custom_from_multiple_to_multiple_distance( def alignment_path( x: np.ndarray, y: np.ndarray, - metric: Union[str, DistanceFunction, None] = None, + measure: Union[str, DistanceFunction, None] = None, **kwargs: Unpack[DistanceKwargs], ) -> tuple[list[tuple[int, int]], float]: """Compute the alignment path and distance between two time series. @@ -313,13 +313,13 @@ def alignment_path( First time series. y : np.ndarray, of shape (m_channels, m_timepoints) or (m_timepoints,) Second time series. - metric : str or Callable - The distance metric to use. - A list of valid distance metrics can be found in the documentation for + measure : str or Callable + The distance measure to use. + A list of valid distance measure can be found in the documentation for :func:`aeon.distances.get_distance_function` or by calling the function :func:`aeon.distances.get_distance_function_names`. kwargs : any - Arguments for metric. Refer to each metrics documentation for a list of + Arguments for measure. Refer to each measure documentation for a list of possible arguments. Returns @@ -335,7 +335,7 @@ def alignment_path( ------ ValueError If x and y are not 1D, or 2D arrays. - If metric is not one of the supported strings or a callable. + If measure is not one of the supported strings or a callable. Examples -------- @@ -343,21 +343,21 @@ def alignment_path( >>> from aeon.distances import alignment_path >>> x = np.array([[1, 2, 3, 6]]) >>> y = np.array([[1, 2, 3, 4]]) - >>> alignment_path(x, y, metric='dtw') + >>> alignment_path(x, y, measure='dtw') ([(0, 0), (1, 1), (2, 2), (3, 3)], 4.0) """ - if metric in ALIGNMENT_PATH: - return DISTANCES_DICT[metric]["alignment_path"](x, y, **kwargs) - elif isinstance(metric, Callable): - return metric(x, y, **kwargs) + if measure in ALIGNMENT_PATH: + return DISTANCES_DICT[measure]["alignment_path"](x, y, **kwargs) + elif isinstance(measure, Callable): + return measure(x, y, **kwargs) else: - raise ValueError("Metric must be one of the supported strings") + raise ValueError("Measure must be one of the supported strings") def cost_matrix( x: np.ndarray, y: np.ndarray, - metric: Union[str, DistanceFunction, None] = None, + measure: Union[str, DistanceFunction, None] = None, **kwargs: Unpack[DistanceKwargs], ) -> np.ndarray: """Compute the alignment path and distance between two time series. @@ -368,13 +368,13 @@ def cost_matrix( First time series. y : np.ndarray, of shape (m_channels, m_timepoints) or (m_timepoints,) Second time series. - metric : str or Callable - The distance metric to use. - A list of valid distance metrics can be found in the documentation for + measure : str or Callable + The distance measure to use. + A list of valid distance measures can be found in the documentation for :func:`aeon.distances.get_distance_function` or by calling the function :func:`aeon.distances.get_distance_function_names`. kwargs : Any - Arguments for metric. Refer to each metrics documentation for a list of + Arguments for measure. Refer to each measures documentation for a list of possible arguments. Returns @@ -386,7 +386,7 @@ def cost_matrix( ------ ValueError If x and y are not 1D, or 2D arrays. - If metric is not one of the supported strings or a callable. + If measure is not one of the supported strings or a callable. Examples -------- @@ -394,7 +394,7 @@ def cost_matrix( >>> from aeon.distances import cost_matrix >>> x = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) >>> y = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) - >>> cost_matrix(x, y, metric="dtw") + >>> cost_matrix(x, y, measure="dtw") array([[ 0., 1., 5., 14., 30., 55., 91., 140., 204., 285.], [ 1., 0., 1., 5., 14., 30., 55., 91., 140., 204.], [ 5., 1., 0., 1., 5., 14., 30., 55., 91., 140.], @@ -406,12 +406,12 @@ def cost_matrix( [204., 140., 91., 55., 30., 14., 5., 1., 0., 1.], [285., 204., 140., 91., 55., 30., 14., 5., 1., 0.]]) """ - if metric in COST_MATRIX: - return DISTANCES_DICT[metric]["cost_matrix"](x, y, **kwargs) - elif isinstance(metric, Callable): - return metric(x, y, **kwargs) + if measure in COST_MATRIX: + return DISTANCES_DICT[measure]["cost_matrix"](x, y, **kwargs) + elif isinstance(measure, Callable): + return measure(x, y, **kwargs) else: - raise ValueError("Metric must be one of the supported strings") + raise ValueError("Measure must be one of the supported strings") def get_distance_function_names() -> list[str]: @@ -440,11 +440,11 @@ def get_distance_function_names() -> list[str]: return sorted(DISTANCES_DICT.keys()) -def get_distance_function(metric: Union[str, DistanceFunction]) -> DistanceFunction: - """Get the distance function for a given metric string or callable. +def get_distance_function(measure: Union[str, DistanceFunction]) -> DistanceFunction: + """Get the distance function for a given measure string or callable. =============== ======================================== - metric Distance Function + measure Distance Function =============== ======================================== 'dtw' distances.dtw_distance 'shape_dtw' distances.shape_dtw_distance @@ -468,8 +468,8 @@ def get_distance_function(metric: Union[str, DistanceFunction]) -> DistanceFunct Parameters ---------- - metric : str or Callable - The distance metric to use. + measure : str or Callable + The distance measure to use. If string given then it will be resolved to a alignment path function. If a callable is given, the value must be a function that accepts two numpy arrays and **kwargs returns a float. @@ -477,12 +477,12 @@ def get_distance_function(metric: Union[str, DistanceFunction]) -> DistanceFunct Returns ------- Callable[[np.ndarray, np.ndarray, Any], float] - The distance function for the given metric. + The distance function for the given measure. Raises ------ ValueError - If metric is not one of the supported strings or a callable. + If measure is not one of the supported strings or a callable. Examples -------- @@ -494,16 +494,16 @@ def get_distance_function(metric: Union[str, DistanceFunction]) -> DistanceFunct >>> dtw_dist_func(x, y, window=0.2) 874.0 """ - return _resolve_key_from_distance(metric, "distance") + return _resolve_key_from_distance(measure, "distance") def get_pairwise_distance_function( - metric: Union[str, PairwiseFunction] + measure: Union[str, PairwiseFunction] ) -> PairwiseFunction: - """Get the pairwise distance function for a given metric string or callable. + """Get the pairwise distance function for a given measure string or callable. =============== ======================================== - metric Distance Function + measure Distance Function =============== ======================================== 'dtw' distances.dtw_pairwise_distance 'shape_dtw' distances.shape_dtw_pairwise_distance @@ -527,8 +527,8 @@ def get_pairwise_distance_function( Parameters ---------- - metric : str or Callable - The metric string to resolve to a alignment path function. + measure : str or Callable + The measure string to resolve to a alignment path function. If string given then it will be resolved to a alignment path function. If a callable is given, the value must be a function that accepts two numpy arrays and **kwargs returns a np.ndarray that is the pairwise distance @@ -537,12 +537,12 @@ def get_pairwise_distance_function( Returns ------- Callable[[np.ndarray, np.ndarray, Any], np.ndarray] - The pairwise distance function for the given metric. + The pairwise distance function for the given measure. Raises ------ ValueError - If metric is not one of the supported strings or a callable. + If measure is not one of the supported strings or a callable. Examples -------- @@ -556,14 +556,14 @@ def get_pairwise_distance_function( [147., 300., 507.], [ 48., 147., 300.]]) """ - return _resolve_key_from_distance(metric, "pairwise_distance") + return _resolve_key_from_distance(measure, "pairwise_distance") -def get_alignment_path_function(metric: str) -> AlignmentPathFunction: - """Get the alignment path function for a given metric string or callable. +def get_alignment_path_function(measure: str) -> AlignmentPathFunction: + """Get the alignment path function for a given measure string or callable. =============== ======================================== - metric Distance Function + measure Distance Function =============== ======================================== 'dtw' distances.dtw_alignment_path 'shape_dtw' distances.shape_dtw_alignment_path @@ -581,19 +581,19 @@ def get_alignment_path_function(metric: str) -> AlignmentPathFunction: Parameters ---------- - metric : str or Callable - The metric string to resolve to an alignment path function. + measure : str or Callable + The measure string to resolve to an alignment path function. Returns ------- Callable[[np.ndarray, np.ndarray, Any], Tuple[List[Tuple[int, int]], float]] - The alignment path function for the given metric. + The alignment path function for the given measure. Raises ------ ValueError - If metric is not one of the supported strings or a callable. - If the metric doesn't have an alignment path function. + If measure is not one of the supported strings or a callable. + If the measure doesn't have an alignment path function. Examples -------- @@ -605,14 +605,14 @@ def get_alignment_path_function(metric: str) -> AlignmentPathFunction: >>> dtw_alignment_path_func(x, y, window=0.2) ([(0, 0), (1, 1), (2, 2), (3, 3), (4, 4)], 500.0) """ - return _resolve_key_from_distance(metric, "alignment_path") + return _resolve_key_from_distance(measure, "alignment_path") -def get_cost_matrix_function(metric: str) -> CostMatrixFunction: - """Get the cost matrix function for a given metric string or callable. +def get_cost_matrix_function(measure: str) -> CostMatrixFunction: + """Get the cost matrix function for a given measure string or callable. =============== ======================================== - metric Distance Function + measure Distance Function =============== ======================================== 'dtw' distances.dtw_cost_matrix 'shape_dtw' distances.shape_dtw_cost_matrix @@ -630,19 +630,19 @@ def get_cost_matrix_function(metric: str) -> CostMatrixFunction: Parameters ---------- - metric : str or Callable - The metric string to resolve to a cost matrix function. + measure : str or Callable + The measure string to resolve to a cost matrix function. Returns ------- Callable[[np.ndarray, np.ndarray, Any], np.ndarray] - The cost matrix function for the given metric. + The cost matrix function for the given measure. Raises ------ ValueError - If metric is not one of the supported strings or a callable. - If the metric doesn't have a cost matrix function. + If measure is not one of the supported strings or a callable. + If the measure doesn't have a cost matrix function. Examples -------- @@ -658,20 +658,20 @@ def get_cost_matrix_function(metric: str) -> CostMatrixFunction: [ inf, inf, 343., 400., 521.], [ inf, inf, inf, 424., 500.]]) """ - return _resolve_key_from_distance(metric, "cost_matrix") + return _resolve_key_from_distance(measure, "cost_matrix") -def _resolve_key_from_distance(metric: Union[str, Callable], key: str) -> Any: - if isinstance(metric, Callable): - return metric - if metric == "mpdist": +def _resolve_key_from_distance(measure: Union[str, Callable], key: str) -> Any: + if isinstance(measure, Callable): + return measure + if measure == "mpdist": return mp_distance - dist = DISTANCES_DICT.get(metric) + dist = DISTANCES_DICT.get(measure) if dist is None: - raise ValueError(f"Unknown metric {metric}") + raise ValueError(f"Unknown measure {measure}") dist_callable = dist.get(key) if dist_callable is None: - raise ValueError(f"Metric {metric} does not have a {key} function") + raise ValueError(f"Measure {measure} does not have a {key} function") return dist_callable diff --git a/aeon/distances/elastic/tests/test_alignment_path.py b/aeon/distances/elastic/tests/test_alignment_path.py index 6c2fa5ebfc..5fd1132783 100644 --- a/aeon/distances/elastic/tests/test_alignment_path.py +++ b/aeon/distances/elastic/tests/test_alignment_path.py @@ -32,7 +32,7 @@ def _validate_alignment_path_result( assert isinstance(alignment_path_result, tuple) assert isinstance(alignment_path_result[0], list) assert isinstance(alignment_path_result[1], float) - assert compute_alignment_path(x, y, metric=name) == alignment_path_result + assert compute_alignment_path(x, y, measure=name) == alignment_path_result # Test a callable being passed assert callable_alignment_path == alignment_path_result diff --git a/aeon/distances/elastic/tests/test_cost_matrix.py b/aeon/distances/elastic/tests/test_cost_matrix.py index 1971923ebf..46f4c0d47a 100644 --- a/aeon/distances/elastic/tests/test_cost_matrix.py +++ b/aeon/distances/elastic/tests/test_cost_matrix.py @@ -30,8 +30,8 @@ def _validate_cost_matrix_result( ---------- x (np.ndarray): The first input array. y (np.ndarray): The second input array. - name: The name of the distance metric. - distance: The distance metric function. + name: The name of the distance measure. + distance: The distance measure function. cost_matrix: The cost matrix function. """ original_x = x.copy() @@ -40,7 +40,7 @@ def _validate_cost_matrix_result( cost_matrix_callable_result = DISTANCES_DICT[name]["cost_matrix"](x, y) assert isinstance(cost_matrix_result, np.ndarray) - assert_almost_equal(cost_matrix_result, compute_cost_matrix(x, y, metric=name)) + assert_almost_equal(cost_matrix_result, compute_cost_matrix(x, y, measure=name)) assert_almost_equal(cost_matrix_callable_result, cost_matrix_result) if name == "ddtw" or name == "wddtw": assert cost_matrix_result.shape == (x.shape[-1] - 2, y.shape[-1] - 2) diff --git a/aeon/distances/tests/test_distances.py b/aeon/distances/tests/test_distances.py index a361d47ae0..efcbd1ee2e 100644 --- a/aeon/distances/tests/test_distances.py +++ b/aeon/distances/tests/test_distances.py @@ -37,7 +37,7 @@ def _validate_distance_result( ---------- x (np.ndarray): First array. y (np.ndarray): Second array. - name (str): Name of the distance metric. + name (str): Name of the distance measure. distance (callable): Distance function. expected_result (float): Expected distance result. check_xy_permuted: (bool): recursively call with swapped series @@ -50,8 +50,8 @@ def _validate_distance_result( dist_result = distance(x, y) assert isinstance(dist_result, float) assert_almost_equal(dist_result, expected_result) - assert_almost_equal(dist_result, compute_distance(x, y, metric=name)) - assert_almost_equal(dist_result, compute_distance(x, y, metric=distance)) + assert_almost_equal(dist_result, compute_distance(x, y, measure=name)) + assert_almost_equal(dist_result, compute_distance(x, y, measure=distance)) dist_result_to_self = distance(x, x) assert isinstance(dist_result_to_self, float) @@ -160,10 +160,10 @@ def test_get_distance_function_names(): def test_resolve_key_from_distance(): """Test _resolve_key_from_distance.""" - with pytest.raises(ValueError, match="Unknown metric"): - _resolve_key_from_distance(metric="FOO", key="cost_matrix") + with pytest.raises(ValueError, match="Unknown measure"): + _resolve_key_from_distance(measure="FOO", key="cost_matrix") with pytest.raises(ValueError): - _resolve_key_from_distance(metric="dtw", key="FOO") + _resolve_key_from_distance(measure="dtw", key="FOO") def foo(x, y): return 0 @@ -176,17 +176,23 @@ def test_incorrect_inputs(): x = np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) y = np.array([[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]]) with pytest.raises( - ValueError, match="Metric must be one of the supported strings or a " "callable" + ValueError, + match="Measure must be one of the supported strings or a " "callable", ): - compute_distance(x, y, metric="FOO") + compute_distance(x, y, measure="FOO") with pytest.raises( - ValueError, match="Metric must be one of the supported strings or a " "callable" + ValueError, + match="Measure must be one of the supported strings or a " "callable", ): - pairwise_distance(x, y, metric="FOO") - with pytest.raises(ValueError, match="Metric must be one of the supported strings"): - alignment_path(x, y, metric="FOO") - with pytest.raises(ValueError, match="Metric must be one of the supported strings"): - cost_matrix(x, y, metric="FOO") + pairwise_distance(x, y, measure="FOO") + with pytest.raises( + ValueError, match="Measure must be one of the supported strings" + ): + alignment_path(x, y, measure="FOO") + with pytest.raises( + ValueError, match="Measure must be one of the supported strings" + ): + cost_matrix(x, y, measure="FOO") x = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) with pytest.raises(ValueError, match="dist_func must be a callable"): diff --git a/aeon/distances/tests/test_numba_distance_parameters.py b/aeon/distances/tests/test_numba_distance_parameters.py index 4353b2ae8f..5b0bc385d0 100644 --- a/aeon/distances/tests/test_numba_distance_parameters.py +++ b/aeon/distances/tests/test_numba_distance_parameters.py @@ -93,7 +93,7 @@ def _test_distance_params( del param_dict["g"] results = [ distance_func(x, y, **param_dict.copy()), - distance(x, y, metric=distance_str, **param_dict.copy()), + distance(x, y, measure=distance_str, **param_dict.copy()), ] if distance_str in _expected_distance_results_params: diff --git a/aeon/distances/tests/test_pairwise.py b/aeon/distances/tests/test_pairwise.py index 6aaab3690b..152913aee0 100644 --- a/aeon/distances/tests/test_pairwise.py +++ b/aeon/distances/tests/test_pairwise.py @@ -52,7 +52,7 @@ def _validate_pairwise_result( Parameters ---------- x: Input np.ndarray. - name: Name of the distance metric. + name: Name of the distance measure. distance: Distance function. pairwise_distance: Pairwise distance function. """ @@ -64,11 +64,11 @@ def _validate_pairwise_result( assert isinstance(pairwise_result, np.ndarray) assert pairwise_result.shape == expected_size assert_almost_equal( - pairwise_result, compute_pairwise_distance(x, metric=name, symmetric=symmetric) + pairwise_result, compute_pairwise_distance(x, measure=name, symmetric=symmetric) ) assert_almost_equal( pairwise_result, - compute_pairwise_distance(x, metric=distance, symmetric=symmetric), + compute_pairwise_distance(x, measure=distance, symmetric=symmetric), ) if isinstance(x, np.ndarray): @@ -100,7 +100,7 @@ def _validate_multiple_to_multiple_result( ---------- x: Input array. y: Input array. - name: Name of the distance metric. + name: Name of the distance measure. distance: Distance function. multiple_to_multiple_distance: Mul-to-Mul distance function. check_xy_permuted: recursively call with swapped series @@ -123,11 +123,11 @@ def _validate_multiple_to_multiple_result( assert multiple_to_multiple_result.shape == expected_size assert_almost_equal( - multiple_to_multiple_result, compute_pairwise_distance(x, y, metric=name) + multiple_to_multiple_result, compute_pairwise_distance(x, y, measure=name) ) assert_almost_equal( multiple_to_multiple_result, - compute_pairwise_distance(x, y, metric=distance), + compute_pairwise_distance(x, y, measure=distance), ) if isinstance(x, np.ndarray) and isinstance(y, np.ndarray): @@ -168,7 +168,7 @@ def _validate_single_to_multiple_result( ---------- x: Input array. y: Input array. - name: Name of the distance metric. + name: Name of the distance measure. distance: Distance function. single_to_multiple_distance: Single to multiple distance function. run_inverse: Boolean that reruns the test with x and y swapped in position @@ -198,11 +198,11 @@ def _validate_single_to_multiple_result( assert single_to_multiple_result.shape[1] == expected_size assert_almost_equal( single_to_multiple_result, - compute_pairwise_distance(x, y, metric=name, symmetric=symmetric), + compute_pairwise_distance(x, y, measure=name, symmetric=symmetric), ) assert_almost_equal( single_to_multiple_result, - compute_pairwise_distance(x, y, metric=distance, symmetric=symmetric), + compute_pairwise_distance(x, y, measure=distance, symmetric=symmetric), ) if len(x_shape) < len(y_shape): diff --git a/aeon/pipeline/__init__.py b/aeon/pipeline/__init__.py index 732dae0187..a0b15f190f 100644 --- a/aeon/pipeline/__init__.py +++ b/aeon/pipeline/__init__.py @@ -1,6 +1,9 @@ """Pipeline maker utility.""" -__all__ = ["make_pipeline", "sklearn_to_aeon"] +__all__ = [ + "make_pipeline", + "sklearn_to_aeon", +] from aeon.pipeline._make_pipeline import make_pipeline from aeon.pipeline._sklearn_to_aeon import sklearn_to_aeon diff --git a/aeon/regression/compose/tests/test_ensemble.py b/aeon/regression/compose/tests/test_ensemble.py index 447afda327..4d5b288de7 100644 --- a/aeon/regression/compose/tests/test_ensemble.py +++ b/aeon/regression/compose/tests/test_ensemble.py @@ -14,7 +14,7 @@ make_example_3d_numpy, make_example_3d_numpy_list, ) -from aeon.testing.mock_estimators import MockHandlesAllInput, MockRegressor +from aeon.testing.mock_estimators import MockRegressor, MockRegressorFullTags mixed_ensemble = [ DummyRegressor(), @@ -114,7 +114,7 @@ def test_unequal_tag_inference(): n_cases=10, min_n_timepoints=8, max_n_timepoints=12, regression_target=True ) - r1 = MockHandlesAllInput() + r1 = MockRegressorFullTags() r2 = MockRegressor() assert r1.get_tag("capability:unequal_length") @@ -144,7 +144,7 @@ def test_missing_tag_inference(): X, y = make_example_3d_numpy(n_cases=10, n_timepoints=12, regression_target=True) X[5, 0, 4] = np.nan - r1 = MockHandlesAllInput() + r1 = MockRegressorFullTags() r2 = MockRegressor() assert r1.get_tag("capability:missing_values") @@ -175,7 +175,7 @@ def test_multivariate_tag_inference(): n_cases=10, n_channels=2, n_timepoints=12, regression_target=True ) - r1 = MockHandlesAllInput() + r1 = MockRegressorFullTags() r2 = MockRegressor() assert r1.get_tag("capability:multivariate") diff --git a/aeon/regression/distance_based/_time_series_neighbors.py b/aeon/regression/distance_based/_time_series_neighbors.py index 29056de961..d56120ea92 100644 --- a/aeon/regression/distance_based/_time_series_neighbors.py +++ b/aeon/regression/distance_based/_time_series_neighbors.py @@ -111,7 +111,7 @@ def _fit(self, X, y): y : array-like, shape = (n_cases) The output value. """ - self.metric_ = get_distance_function(metric=self.distance) + self.metric_ = get_distance_function(measure=self.distance) self.X_ = X self.y_ = y return self diff --git a/aeon/regression/tests/test_base.py b/aeon/regression/tests/test_base.py index 30302de8c7..d04469db8b 100644 --- a/aeon/regression/tests/test_base.py +++ b/aeon/regression/tests/test_base.py @@ -12,7 +12,7 @@ EQUAL_LENGTH_UNIVARIATE_REGRESSION, UNEQUAL_LENGTH_UNIVARIATE_REGRESSION, ) -from aeon.utils import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES class _TestRegressor(BaseRegressor): diff --git a/aeon/segmentation/tests/test_base.py b/aeon/segmentation/tests/test_base.py index 013e4ecdad..870fbdb7e5 100644 --- a/aeon/segmentation/tests/test_base.py +++ b/aeon/segmentation/tests/test_base.py @@ -5,7 +5,7 @@ import pytest from aeon.segmentation.base import BaseSegmenter -from aeon.testing.mock_estimators import MockSegmenter, SupervisedMockSegmenter +from aeon.testing.mock_estimators import MockSegmenter, MockSegmenterRequiresY def test_fit_predict_correct(): @@ -25,7 +25,7 @@ def test_fit_predict_correct(): assert res.is_fitted res = seg.fit_predict(x_correct) assert isinstance(res, np.ndarray) - seg = SupervisedMockSegmenter() + seg = MockSegmenterRequiresY() res = seg.fit(x_correct, y=x_correct) assert res.is_fitted with pytest.raises( diff --git a/aeon/testing/estimator_checking/_estimator_checking.py b/aeon/testing/estimator_checking/_estimator_checking.py index c07815ea89..97eed5a0e9 100644 --- a/aeon/testing/estimator_checking/_estimator_checking.py +++ b/aeon/testing/estimator_checking/_estimator_checking.py @@ -193,6 +193,7 @@ class is passed. {'check_get_params(estimator=MockClassifier())': 'PASSED'} """ # check if estimator has soft dependencies installed + _check_soft_dependencies("pytest") _check_estimator_deps(estimator) checks = [] diff --git a/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py b/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py index 0d94fbc34f..5f2f05aaa9 100644 --- a/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py +++ b/aeon/testing/estimator_checking/_yield_anomaly_detection_checks.py @@ -3,7 +3,6 @@ from functools import partial import numpy as np -import pytest from aeon.base._base import _clone_estimator from aeon.base._base_series import VALID_SERIES_INNER_TYPES @@ -64,6 +63,8 @@ def check_anomaly_detector_overrides_and_tags(estimator_class): def check_anomaly_detector_univariate(estimator): """Test the anomaly detector on univariate data.""" + import pytest + estimator = _clone_estimator(estimator) if estimator.get_tag(tag_name="capability:univariate"): @@ -78,6 +79,8 @@ def check_anomaly_detector_univariate(estimator): def check_anomaly_detector_multivariate(estimator): """Test the anomaly detector on multivariate data.""" + import pytest + estimator = _clone_estimator(estimator) if estimator.get_tag(tag_name="capability:multivariate"): diff --git a/aeon/testing/estimator_checking/_yield_classification_checks.py b/aeon/testing/estimator_checking/_yield_classification_checks.py index 00b82705de..09f15877be 100644 --- a/aeon/testing/estimator_checking/_yield_classification_checks.py +++ b/aeon/testing/estimator_checking/_yield_classification_checks.py @@ -24,7 +24,7 @@ _assert_predict_probabilities, _get_tag, ) -from aeon.utils import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES from aeon.utils.validation import get_n_cases diff --git a/aeon/testing/estimator_checking/_yield_estimator_checks.py b/aeon/testing/estimator_checking/_yield_estimator_checks.py index b5fb60c1d3..70f714d4d9 100644 --- a/aeon/testing/estimator_checking/_yield_estimator_checks.py +++ b/aeon/testing/estimator_checking/_yield_estimator_checks.py @@ -9,7 +9,6 @@ import joblib import numpy as np -import pytest from numpy.testing import assert_array_almost_equal from sklearn.exceptions import NotFittedError @@ -596,6 +595,8 @@ def check_fit_updates_state_and_cloning(estimator, datatype): def check_raises_not_fitted_error(estimator, datatype): """Check exception raised for non-fit method calls to unfitted estimators.""" + import pytest + estimator = _clone_estimator(estimator) for method in NON_STATE_CHANGING_METHODS: diff --git a/aeon/testing/estimator_checking/_yield_regression_checks.py b/aeon/testing/estimator_checking/_yield_regression_checks.py index bf6d2cb568..52933e81f7 100644 --- a/aeon/testing/estimator_checking/_yield_regression_checks.py +++ b/aeon/testing/estimator_checking/_yield_regression_checks.py @@ -20,7 +20,7 @@ ) from aeon.testing.testing_data import FULL_TEST_DATA_DICT from aeon.testing.utils.estimator_checks import _assert_predict_labels, _get_tag -from aeon.utils import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES def _yield_regression_checks(estimator_class, estimator_instances, datatypes): diff --git a/aeon/testing/estimator_checking/_yield_segmentation_checks.py b/aeon/testing/estimator_checking/_yield_segmentation_checks.py index 6c2c964f5a..054ca56bd4 100644 --- a/aeon/testing/estimator_checking/_yield_segmentation_checks.py +++ b/aeon/testing/estimator_checking/_yield_segmentation_checks.py @@ -3,7 +3,6 @@ from functools import partial import numpy as np -import pytest from aeon.base._base import _clone_estimator from aeon.base._base_series import VALID_SERIES_INNER_TYPES @@ -42,6 +41,8 @@ def check_segmenter_base_functionality(estimator_class): def check_segmenter_instance(estimator): """Test segmenters.""" + import pytest + estimator = _clone_estimator(estimator) def _assert_output(output, dense, length): diff --git a/aeon/testing/estimator_checking/_yield_soft_dependency_checks.py b/aeon/testing/estimator_checking/_yield_soft_dependency_checks.py index 83c6f96b83..ed5c9605b1 100644 --- a/aeon/testing/estimator_checking/_yield_soft_dependency_checks.py +++ b/aeon/testing/estimator_checking/_yield_soft_dependency_checks.py @@ -6,8 +6,6 @@ from functools import partial -import pytest - from aeon.utils.validation._dependencies import ( _check_python_version, _check_soft_dependencies, @@ -23,6 +21,8 @@ def _yield_soft_dependency_checks(estimator_class, estimator_instances, datatype def check_python_version_softdep(estimator_class): """Test that estimators raise error if python version is wrong.""" + import pytest + # if dependencies are incompatible skip softdeps = estimator_class.get_class_tag("python_dependencies", None) if softdeps is not None and not _check_soft_dependencies(softdeps, severity="none"): @@ -46,6 +46,8 @@ def check_python_version_softdep(estimator_class): def check_python_dependency_softdep(estimator_class): """Test that estimators raise error if required soft dependencies are missing.""" + import pytest + # if python version is incompatible skip if not _check_python_version(estimator_class, severity="none"): return diff --git a/aeon/testing/estimator_checking/_yield_transformation_checks.py b/aeon/testing/estimator_checking/_yield_transformation_checks.py index 507c8c1e08..4a8c51f795 100644 --- a/aeon/testing/estimator_checking/_yield_transformation_checks.py +++ b/aeon/testing/estimator_checking/_yield_transformation_checks.py @@ -20,7 +20,7 @@ from aeon.testing.utils.estimator_checks import _run_estimator_method from aeon.transformations.collection.channel_selection.base import BaseChannelSelector from aeon.transformations.series import BaseSeriesTransformer -from aeon.utils import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES def _yield_transformation_checks(estimator_class, estimator_instances, datatypes): diff --git a/aeon/testing/mock_estimators/__init__.py b/aeon/testing/mock_estimators/__init__.py index 32d947cb7d..219fc3e987 100644 --- a/aeon/testing/mock_estimators/__init__.py +++ b/aeon/testing/mock_estimators/__init__.py @@ -1,26 +1,46 @@ """Mock estimators for testing and debugging.""" __all__ = [ - "make_mock_estimator", + # anomaly detection + "MockAnomalyDetector", + "MockAnomalyDetectorRequiresFit", + "MockAnomalyDetectorRequiresY", + # classification "MockClassifier", "MockClassifierPredictProba", "MockClassifierFullTags", "MockClassifierParams", + "MockClassifierComposite", + # clustering "MockCluster", "MockDeepClusterer", - "MockSegmenter", - "SupervisedMockSegmenter", - "MockHandlesAllInput", + # collection transformation + "MockCollectionTransformer", + # forecasting + "MockForecaster", + # regression "MockRegressor", + "MockRegressorFullTags", + # segmentation + "MockSegmenter", + "MockSegmenterRequiresY", + # series transformation + "MockSeriesTransformer", + "MockUnivariateSeriesTransformer", "MockMultivariateSeriesTransformer", "MockSeriesTransformerNoFit", - "MockUnivariateSeriesTransformer", - "MockCollectionTransformer", - "MockSeriesTransformer", + # similarity search + "MockSimilaritySearch", ] +from aeon.testing.mock_estimators._mock_anomaly_detectors import ( + MockAnomalyDetector, + MockAnomalyDetectorRequiresFit, + MockAnomalyDetectorRequiresY, +) from aeon.testing.mock_estimators._mock_classifiers import ( MockClassifier, + MockClassifierComposite, MockClassifierFullTags, MockClassifierParams, MockClassifierPredictProba, @@ -29,13 +49,14 @@ from aeon.testing.mock_estimators._mock_collection_transformers import ( MockCollectionTransformer, ) +from aeon.testing.mock_estimators._mock_forecasters import MockForecaster from aeon.testing.mock_estimators._mock_regressors import ( - MockHandlesAllInput, MockRegressor, + MockRegressorFullTags, ) from aeon.testing.mock_estimators._mock_segmenters import ( MockSegmenter, - SupervisedMockSegmenter, + MockSegmenterRequiresY, ) from aeon.testing.mock_estimators._mock_series_transformers import ( MockMultivariateSeriesTransformer, @@ -43,3 +64,4 @@ MockSeriesTransformerNoFit, MockUnivariateSeriesTransformer, ) +from aeon.testing.mock_estimators._mock_similarity_search import MockSimilaritySearch diff --git a/aeon/testing/mock_estimators/_mock_anomaly_detectors.py b/aeon/testing/mock_estimators/_mock_anomaly_detectors.py index 78ddfe76f1..4ec14d35fa 100644 --- a/aeon/testing/mock_estimators/_mock_anomaly_detectors.py +++ b/aeon/testing/mock_estimators/_mock_anomaly_detectors.py @@ -1,4 +1,4 @@ -"""Mock anomaly detectors for testing.""" +"""Mock anomaly detectorsuseful for testing and debugging.""" __maintainer__ = ["MatthewMiddlehurst"] __all__ = [ diff --git a/aeon/testing/mock_estimators/_mock_classifiers.py b/aeon/testing/mock_estimators/_mock_classifiers.py index bcfcb8162e..1bf9357d60 100644 --- a/aeon/testing/mock_estimators/_mock_classifiers.py +++ b/aeon/testing/mock_estimators/_mock_classifiers.py @@ -1,7 +1,13 @@ -"""Mock classifiers useful for testing and debugging. - -Used in tests for the classifier base class. -""" +"""Mock classifiers useful for testing and debugging.""" + +__maintainer__ = ["MatthewMiddlehurst"] +__all__ = [ + "MockClassifier", + "MockClassifierPredictProba", + "MockClassifierFullTags", + "MockClassifierParams", + "MockClassifierComposite", +] import numpy as np diff --git a/aeon/testing/mock_estimators/_mock_clusterers.py b/aeon/testing/mock_estimators/_mock_clusterers.py index b920b83c30..53b1014290 100644 --- a/aeon/testing/mock_estimators/_mock_clusterers.py +++ b/aeon/testing/mock_estimators/_mock_clusterers.py @@ -1,3 +1,11 @@ +"""Mock clusterers useful for testing and debugging.""" + +__maintainer__ = [] +__all__ = [ + "MockCluster", + "MockDeepClusterer", +] + import numpy as np from aeon.clustering.base import BaseClusterer diff --git a/aeon/testing/mock_estimators/_mock_collection_transformers.py b/aeon/testing/mock_estimators/_mock_collection_transformers.py index bc59069283..7feb8d46a9 100644 --- a/aeon/testing/mock_estimators/_mock_collection_transformers.py +++ b/aeon/testing/mock_estimators/_mock_collection_transformers.py @@ -1,4 +1,9 @@ -"""Mock collection transformers.""" +"""Mock collection transformers useful for testing and debugging.""" + +__maintainer__ = [] +__all__ = [ + "MockCollectionTransformer", +] from aeon.transformations.collection import BaseCollectionTransformer diff --git a/aeon/testing/mock_estimators/_mock_forecasters.py b/aeon/testing/mock_estimators/_mock_forecasters.py index f5bb86d249..8eb2ba2635 100644 --- a/aeon/testing/mock_estimators/_mock_forecasters.py +++ b/aeon/testing/mock_estimators/_mock_forecasters.py @@ -1,7 +1,10 @@ -"""Mock forecasters useful for testing and debugging. +"""Mock forecasters useful for testing and debugging.""" + +__maintainer__ = ["TonyBagnall"] +__all__ = [ + "MockForecaster", +] -Used in tests for the forecasting base class. -""" from aeon.forecasting.base import BaseForecaster diff --git a/aeon/testing/mock_estimators/_mock_regressors.py b/aeon/testing/mock_estimators/_mock_regressors.py index 355534abac..5258019aab 100644 --- a/aeon/testing/mock_estimators/_mock_regressors.py +++ b/aeon/testing/mock_estimators/_mock_regressors.py @@ -1,3 +1,11 @@ +"""Mock regressors useful for testing and debugging.""" + +__maintainer__ = ["MatthewMiddlehurst"] +__all__ = [ + "MockRegressor", + "MockRegressorFullTags", +] + from sklearn.utils import check_random_state from aeon.regression.base import BaseRegressor @@ -20,7 +28,7 @@ def _predict(self, X): return rng.random(size=(len(X))) -class MockHandlesAllInput(BaseRegressor): +class MockRegressorFullTags(BaseRegressor): """Dummy regressor for testing base class fit/predict/predict_proba.""" _tags = { diff --git a/aeon/testing/mock_estimators/_mock_segmenters.py b/aeon/testing/mock_estimators/_mock_segmenters.py index 82ff6a81f6..45b7a524aa 100644 --- a/aeon/testing/mock_estimators/_mock_segmenters.py +++ b/aeon/testing/mock_estimators/_mock_segmenters.py @@ -1,4 +1,10 @@ -"""Mock segmenters for testing.""" +"""Mock segmenters useful for testing and debugging.""" + +__maintainer__ = [] +__all__ = [ + "MockSegmenter", + "MockSegmenterRequiresY", +] import numpy as np @@ -42,7 +48,7 @@ def _get_test_params(cls, parameter_set="default"): return {} -class SupervisedMockSegmenter(MockSegmenter): +class MockSegmenterRequiresY(MockSegmenter): """Mock segmenter for testing.""" _tags = { diff --git a/aeon/testing/mock_estimators/_mock_series_transformers.py b/aeon/testing/mock_estimators/_mock_series_transformers.py index 937f4a6325..66d62ef687 100644 --- a/aeon/testing/mock_estimators/_mock_series_transformers.py +++ b/aeon/testing/mock_estimators/_mock_series_transformers.py @@ -1,4 +1,12 @@ -"""Mock series transformers.""" +"""Mock series transformers useful for testing and debugging.""" + +__maintainer__ = [] +__all__ = [ + "MockSeriesTransformer", + "MockUnivariateSeriesTransformer", + "MockMultivariateSeriesTransformer", + "MockSeriesTransformerNoFit", +] import numpy as np diff --git a/aeon/testing/mock_estimators/_mock_similarity_search.py b/aeon/testing/mock_estimators/_mock_similarity_search.py index 8542b81a1b..55c9c435c7 100644 --- a/aeon/testing/mock_estimators/_mock_similarity_search.py +++ b/aeon/testing/mock_estimators/_mock_similarity_search.py @@ -1,12 +1,14 @@ -"""Mock similarity search useful for testing and debugging. +"""Mock similarity searchers useful for testing and debugging.""" -Used in tests for the query search base class. -""" +__maintainer__ = ["baraline"] +__all__ = [ + "MockSimilaritySearch", +] from aeon.similarity_search.base import BaseSimilaritySearch -class MocksimilaritySearch(BaseSimilaritySearch): +class MockSimilaritySearch(BaseSimilaritySearch): """Mock similarity search for testing base class predict.""" def _fit(self, X, y=None): diff --git a/aeon/testing/tests/__init__.py b/aeon/testing/tests/__init__.py index 69d1ff2d83..aaeacff24d 100644 --- a/aeon/testing/tests/__init__.py +++ b/aeon/testing/tests/__init__.py @@ -1 +1,13 @@ -"""Tests for the aeon package and testing module utiltiies.""" +"""Tests for the aeon package and testing module utilties.""" + +import pkgutil + +import aeon + +# collect all modules +ALL_AEON_MODULES = pkgutil.walk_packages(aeon.__path__, aeon.__name__ + ".") +ALL_AEON_MODULES = [x[1] for x in ALL_AEON_MODULES] + +ALL_AEON_MODULES_NO_TESTS = [ + x for x in ALL_AEON_MODULES if not any(part == "tests" for part in x.split(".")) +] diff --git a/aeon/testing/tests/test_core_imports.py b/aeon/testing/tests/test_core_imports.py new file mode 100644 index 0000000000..f13740c08e --- /dev/null +++ b/aeon/testing/tests/test_core_imports.py @@ -0,0 +1,26 @@ +"""Tests that non-core dependencies are handled correctly in modules.""" + +import re +from importlib import import_module + +from aeon.testing.tests import ALL_AEON_MODULES_NO_TESTS + +if __name__ == "__main__": + """Test imports in aeon modules with core dependencies only. + + Imports all modules and catch exceptions due to missing dependencies. + """ + for module in ALL_AEON_MODULES_NO_TESTS: + try: + import_module(module) + except ModuleNotFoundError as e: # pragma: no cover + dependency = "unknown" + match = re.search(r"\'(.+?)\'", str(e)) + if match: + dependency = match.group(1) + + raise ModuleNotFoundError( + f"The module: {module} should not require any non-core dependencies, " + f"but tried importing: '{dependency}'. Make sure non-core dependencies " + f"are properly isolated outside of tests/ directories." + ) from e diff --git a/aeon/testing/tests/test_softdeps.py b/aeon/testing/tests/test_softdeps.py index 830d0e80f0..2271d21497 100644 --- a/aeon/testing/tests/test_softdeps.py +++ b/aeon/testing/tests/test_softdeps.py @@ -1,23 +1,14 @@ """Tests that soft dependencies are handled correctly in modules.""" -__maintainer__ = [] - -import pkgutil import re from importlib import import_module import pytest -import aeon from aeon.testing.testing_config import PR_TESTING +from aeon.testing.tests import ALL_AEON_MODULES, ALL_AEON_MODULES_NO_TESTS -# collect all modules -modules = pkgutil.walk_packages(aeon.__path__, aeon.__name__ + ".") -modules = [x[1] for x in modules] - -if PR_TESTING: # pragma: no cover - # exclude test modules - modules = [x for x in modules if not any(part == "tests" for part in x.split("."))] +modules = ALL_AEON_MODULES_NO_TESTS if PR_TESTING else ALL_AEON_MODULES def test_module_crawl(): diff --git a/aeon/testing/tests/test_testing_data.py b/aeon/testing/tests/test_testing_data.py index 505cb474a8..f9afe264dd 100644 --- a/aeon/testing/tests/test_testing_data.py +++ b/aeon/testing/tests/test_testing_data.py @@ -20,7 +20,7 @@ UNEQUAL_LENGTH_UNIVARIATE_REGRESSION, UNEQUAL_LENGTH_UNIVARIATE_SIMILARITY_SEARCH, ) -from aeon.utils import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES from aeon.utils.validation import ( has_missing, is_collection, diff --git a/aeon/testing/utils/deep_equals.py b/aeon/testing/utils/deep_equals.py index 86c6c5cd96..81f6d91534 100644 --- a/aeon/testing/utils/deep_equals.py +++ b/aeon/testing/utils/deep_equals.py @@ -1,6 +1,6 @@ """Testing utility to compare equality in value for nested objects.""" -__maintainer__ = [] +__maintainer__ = ["MatthewMiddlehurst"] __all__ = ["deep_equals"] from inspect import isclass diff --git a/aeon/testing/utils/output_supression.py b/aeon/testing/utils/output_suppression.py similarity index 100% rename from aeon/testing/utils/output_supression.py rename to aeon/testing/utils/output_suppression.py diff --git a/aeon/testing/utils/tests/test_output_supression.py b/aeon/testing/utils/tests/test_output_supression.py index 8e8b0a4862..56f7b18ec5 100644 --- a/aeon/testing/utils/tests/test_output_supression.py +++ b/aeon/testing/utils/tests/test_output_supression.py @@ -2,7 +2,7 @@ import sys -from aeon.testing.utils.output_supression import suppress_output +from aeon.testing.utils.output_suppression import suppress_output @suppress_output() diff --git a/aeon/transformations/collection/_hog1d.py b/aeon/transformations/collection/_hog1d.py index 3deddf5931..a3cc18d8aa 100644 --- a/aeon/transformations/collection/_hog1d.py +++ b/aeon/transformations/collection/_hog1d.py @@ -6,7 +6,7 @@ import numpy as np from aeon.transformations.collection.base import BaseCollectionTransformer -from aeon.utils import split_series +from aeon.utils.split import split_series class HOG1DTransformer(BaseCollectionTransformer): diff --git a/aeon/transformations/collection/_slope.py b/aeon/transformations/collection/_slope.py index 7a11cbcdb6..cf9d860478 100644 --- a/aeon/transformations/collection/_slope.py +++ b/aeon/transformations/collection/_slope.py @@ -8,7 +8,7 @@ import numpy as np from aeon.transformations.collection.base import BaseCollectionTransformer -from aeon.utils import split_series +from aeon.utils.split import split_series class SlopeTransformer(BaseCollectionTransformer): diff --git a/aeon/transformations/collection/channel_selection/_elbow_class.py b/aeon/transformations/collection/channel_selection/_elbow_class.py index d32b7fbb1e..be0f8102de 100644 --- a/aeon/transformations/collection/channel_selection/_elbow_class.py +++ b/aeon/transformations/collection/channel_selection/_elbow_class.py @@ -90,7 +90,7 @@ class values {len(class_vals)} must be of same length." lambda row: aeon_distance( row[: row.shape[0] // 2], row[row.shape[0] // 2 :], - metric="dtw", + measure="dtw", ), axis=1, arr=np.concatenate((cls1_ch, cls2_ch), axis=1), diff --git a/aeon/transformations/collection/compose/_identity.py b/aeon/transformations/collection/compose/_identity.py index 20c6674b6b..a359255242 100644 --- a/aeon/transformations/collection/compose/_identity.py +++ b/aeon/transformations/collection/compose/_identity.py @@ -1,7 +1,7 @@ """Identity transformer.""" from aeon.transformations.collection import BaseCollectionTransformer -from aeon.utils import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES class CollectionId(BaseCollectionTransformer): diff --git a/aeon/transformations/series/tests/test_warping.py b/aeon/transformations/series/tests/test_warping.py index ca14ab65a3..f707d0e198 100644 --- a/aeon/transformations/series/tests/test_warping.py +++ b/aeon/transformations/series/tests/test_warping.py @@ -16,7 +16,7 @@ def test_warping_path_transformer(distance): x = make_example_2d_numpy_series(n_timepoints=20, n_channels=2) y = make_example_2d_numpy_series(n_timepoints=20, n_channels=2) - alignment_path_function = get_alignment_path_function(metric=distance) + alignment_path_function = get_alignment_path_function(measure=distance) warping_path = alignment_path_function(x, y)[0] diff --git a/aeon/utils/__init__.py b/aeon/utils/__init__.py index 8f16b7102d..e198bb676e 100644 --- a/aeon/utils/__init__.py +++ b/aeon/utils/__init__.py @@ -1,20 +1,7 @@ """Utility functionality.""" __all__ = [ - "split_series", - "ALL_TIME_SERIES_TYPES", - "COLLECTIONS_DATA_TYPES", - "SERIES_DATA_TYPES", - "HIERARCHICAL_DATA_TYPES", - # github debug util - "show_versions", + "show_versions", # github debug util ] -from aeon.utils._data_types import ( - ALL_TIME_SERIES_TYPES, - COLLECTIONS_DATA_TYPES, - HIERARCHICAL_DATA_TYPES, - SERIES_DATA_TYPES, -) -from aeon.utils._split import split_series from aeon.utils.show_versions import show_versions diff --git a/aeon/utils/conversion/_convert_collection.py b/aeon/utils/conversion/_convert_collection.py index e41ed0c8a2..0e3e28f1af 100644 --- a/aeon/utils/conversion/_convert_collection.py +++ b/aeon/utils/conversion/_convert_collection.py @@ -22,7 +22,7 @@ import pandas as pd from numba.typed import List as NumbaList -from aeon.utils._data_types import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES from aeon.utils.validation.collection import _equal_length, get_type diff --git a/aeon/utils/conversion/tests/test_convert_collection.py b/aeon/utils/conversion/tests/test_convert_collection.py index e9940aa673..3776dc7f4f 100644 --- a/aeon/utils/conversion/tests/test_convert_collection.py +++ b/aeon/utils/conversion/tests/test_convert_collection.py @@ -8,7 +8,6 @@ EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION, UNEQUAL_LENGTH_UNIVARIATE_CLASSIFICATION, ) -from aeon.utils import COLLECTIONS_DATA_TYPES from aeon.utils.conversion._convert_collection import ( _from_numpy2d_to_df_list, _from_numpy2d_to_np_list, @@ -24,6 +23,7 @@ resolve_equal_length_inner_type, resolve_unequal_length_inner_type, ) +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES from aeon.utils.validation.collection import ( _equal_length, get_n_cases, diff --git a/aeon/utils/_data_types.py b/aeon/utils/data_types.py similarity index 100% rename from aeon/utils/_data_types.py rename to aeon/utils/data_types.py diff --git a/aeon/utils/networks/weight_norm.py b/aeon/utils/networks/weight_norm.py index 1a613f9b64..459cfd7104 100644 --- a/aeon/utils/networks/weight_norm.py +++ b/aeon/utils/networks/weight_norm.py @@ -5,7 +5,7 @@ if _check_soft_dependencies(["tensorflow"], severity="none"): import tensorflow as tf - class WeightNormalization(tf.keras.layers.Wrapper): + class _WeightNormalization(tf.keras.layers.Wrapper): """Apply weight normalization to a Keras layer.""" def __init__(self, layer, **kwargs): diff --git a/aeon/utils/sklearn.py b/aeon/utils/sklearn.py index 1f7d45a6fc..8ee26cf311 100644 --- a/aeon/utils/sklearn.py +++ b/aeon/utils/sklearn.py @@ -1,5 +1,15 @@ """Sklearn related typing and inheritance checking utility.""" +__maintainer__ = [] +__all__ = [ + "is_sklearn_estimator", + "sklearn_estimator_identifier", + "is_sklearn_transformer", + "is_sklearn_classifier", + "is_sklearn_regressor", + "is_sklearn_clusterer", +] + from inspect import isclass from sklearn.base import ( @@ -12,8 +22,6 @@ from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.pipeline import Pipeline -__maintainer__ = [] - from aeon.base import BaseAeonEstimator diff --git a/aeon/utils/_split.py b/aeon/utils/split.py similarity index 100% rename from aeon/utils/_split.py rename to aeon/utils/split.py diff --git a/aeon/utils/tags/_tags.py b/aeon/utils/tags/_tags.py index 554584115e..e1bacdd5ad 100644 --- a/aeon/utils/tags/_tags.py +++ b/aeon/utils/tags/_tags.py @@ -17,7 +17,7 @@ class : identifier for the base class of objects this tag applies to __maintainer__ = ["MatthewMiddlehurst"] __all__ = ["ESTIMATOR_TAGS"] -from aeon.utils import COLLECTIONS_DATA_TYPES, SERIES_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES, SERIES_DATA_TYPES ESTIMATOR_TAGS = { # all estimators diff --git a/aeon/utils/tests/test_show_versions.py b/aeon/utils/tests/test_show_versions.py index 866692a7d6..b47810ed98 100644 --- a/aeon/utils/tests/test_show_versions.py +++ b/aeon/utils/tests/test_show_versions.py @@ -1,6 +1,6 @@ """Test the show versions function.""" -from aeon.testing.utils.output_supression import suppress_output +from aeon.testing.utils.output_suppression import suppress_output from aeon.utils.show_versions import show_versions diff --git a/aeon/utils/tests/test_split.py b/aeon/utils/tests/test_split.py index 3c4f8df751..4e655f3aa2 100644 --- a/aeon/utils/tests/test_split.py +++ b/aeon/utils/tests/test_split.py @@ -3,7 +3,7 @@ import numpy as np import pytest -from aeon.utils import split_series +from aeon.utils.split import split_series X = np.arange(10) testdata = [ diff --git a/aeon/utils/tests/test_weighted_metrics.py b/aeon/utils/tests/test_weighted_metrics.py deleted file mode 100644 index c8a0b09135..0000000000 --- a/aeon/utils/tests/test_weighted_metrics.py +++ /dev/null @@ -1,25 +0,0 @@ -"""Test weighted metric.""" - -import numpy as np -import pytest - -from aeon.utils.weighted_metrics import weighted_geometric_mean - - -def test_weighted_geometric_mean(): - """Test weighted_geometric_mean.""" - y = np.array([1.0, 2.0, 3.0]) - w = np.array([0.1, 0.8, 0.1]) - w2 = np.array([[0.1, 0.8, 0.1]]).T - res = weighted_geometric_mean(y, w) - assert round(res, 5) == 1.94328 - res2 = weighted_geometric_mean(y, w, axis=0) - assert res == res2 - y2 = np.array([[1.0, 2.0, 3.0]]).T - with pytest.raises(ValueError, match="do not match"): - weighted_geometric_mean(y2, w, axis=1) - weighted_geometric_mean(y2, w2, axis=1) - with pytest.raises( - ValueError, match="Input data and weights have inconsistent shapes" - ): - weighted_geometric_mean(y, w2) diff --git a/aeon/utils/tests/test_weightnorm.py b/aeon/utils/tests/test_weightnorm.py index 43b20293d5..0642530b2c 100644 --- a/aeon/utils/tests/test_weightnorm.py +++ b/aeon/utils/tests/test_weightnorm.py @@ -16,11 +16,11 @@ def test_weight_norm(): import numpy as np import tensorflow as tf - from aeon.utils.networks.weight_norm import WeightNormalization + from aeon.utils.networks.weight_norm import _WeightNormalization X = np.random.random((10, 10, 5)) _input = tf.keras.layers.Input((10, 5)) - l1 = WeightNormalization( + l1 = _WeightNormalization( tf.keras.layers.Conv1D(filters=5, kernel_size=1, dilation_rate=4) )(_input) model = tf.keras.models.Model(inputs=_input, outputs=l1) @@ -42,7 +42,7 @@ def test_weight_norm(): model_path = "test_weight_norm_model.h5" model.save(model_path) loaded_model = tf.keras.models.load_model( - model_path, custom_objects={"WeightNormalization": WeightNormalization} + model_path, custom_objects={"_WeightNormalization": _WeightNormalization} ) assert loaded_model is not None loaded_output = loaded_model.predict(X) diff --git a/aeon/utils/validation/__init__.py b/aeon/utils/validation/__init__.py index 14a16853ad..7e86a79a13 100644 --- a/aeon/utils/validation/__init__.py +++ b/aeon/utils/validation/__init__.py @@ -12,7 +12,6 @@ "check_window_length", "get_n_cases", "get_type", - "equal_length", "is_equal_length", "has_missing", "is_univariate", diff --git a/aeon/utils/validation/tests/test_collection.py b/aeon/utils/validation/tests/test_collection.py index b1c27e4a64..4c53572b32 100644 --- a/aeon/utils/validation/tests/test_collection.py +++ b/aeon/utils/validation/tests/test_collection.py @@ -13,7 +13,7 @@ make_example_3d_numpy_list, ) from aeon.testing.testing_data import EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION -from aeon.utils import COLLECTIONS_DATA_TYPES +from aeon.utils.data_types import COLLECTIONS_DATA_TYPES from aeon.utils.validation.collection import ( _is_numpy_list_multivariate, _is_pd_wide, diff --git a/aeon/utils/weighted_metrics.py b/aeon/utils/weighted_metrics.py deleted file mode 100644 index 84ec005d2f..0000000000 --- a/aeon/utils/weighted_metrics.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Statistical functionality used throughout aeon.""" - -import numpy as np -from sklearn.utils.validation import check_consistent_length - -__maintainer__ = [] -__all__ = [ - "weighted_geometric_mean", -] - - -def weighted_geometric_mean(y, weights, axis=None): - """Calculate weighted version of geometric mean. - - Parameters - ---------- - y : np.ndarray - Values to take the weighted geometric mean of. - weights: np.ndarray - Weights for each value in `array`. Must be same shape as `array` or - of shape `(array.shape[0],)` if axis=0 or `(array.shape[1], ) if axis=1. - axis : int - The axis of `y` to apply the weights to. - - Returns - ------- - geometric_mean : float - Weighted geometric mean - """ - if weights.ndim == 1: - if axis == 0: - check_consistent_length(y, weights) - elif axis == 1: - if y.shape[1] != len(weights): - raise ValueError( - f"Input features ({y.shape[1]}) do not match " - f"number of `weights` ({len(weights)})." - ) - weight_sums = np.sum(weights) - else: - if y.shape != weights.shape: - raise ValueError("Input data and weights have inconsistent shapes.") - weight_sums = np.sum(weights, axis=axis) - return np.exp(np.sum(weights * np.log(y), axis=axis) / weight_sums) diff --git a/docs/api_reference.md b/docs/api_reference.md index 34a580ba74..d0c6335ca7 100644 --- a/docs/api_reference.md +++ b/docs/api_reference.md @@ -19,6 +19,7 @@ api_reference/clustering api_reference/data_format api_reference/datasets api_reference/distances +api_reference/forecasting api_reference/networks api_reference/regression api_reference/segmentation diff --git a/docs/api_reference/forecasting.md b/docs/api_reference/forecasting.md new file mode 100644 index 0000000000..131fb8be86 --- /dev/null +++ b/docs/api_reference/forecasting.md @@ -0,0 +1,14 @@ +# Forecasting + +```{eval-rst} +.. currentmodule:: aeon.datasets + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + DummyForecaster + BaseForecaster + RegressionForecaster + ETSForecaster +``` diff --git a/docs/api_reference/networks.rst b/docs/api_reference/networks.rst index 17493a7b6d..3eaee2c370 100644 --- a/docs/api_reference/networks.rst +++ b/docs/api_reference/networks.rst @@ -25,3 +25,7 @@ Deep learning networks LITENetwork AEBiGRUNetwork DisjointCNNNetwork + DCNNNetwork + AEDCNNNetwork + AEAttentionBiGRUNetwork + AEDRNNNetwork diff --git a/docs/api_reference/utils.rst b/docs/api_reference/utils.rst index adaacdf0f2..40dea9f67c 100644 --- a/docs/api_reference/utils.rst +++ b/docs/api_reference/utils.rst @@ -5,18 +5,13 @@ Utility functions ``aeon`` has a number of modules dedicated to utilities: -* :mod:`aeon.pipeline`, which contains generics for pipeline construction. +* :mod:`aeon.pipeline`, which contains functions for pipeline construction. +* :mod:`aeon.testing`, which contains functions for estimator testing and data generation. * :mod:`aeon.utils`, which contains generic utility functions. -Pipeline construction ---------------------- - -:mod:`aeon.pipeline` - -.. automodule:: aeon.pipeline - :no-members: - :no-inherited-members: +Pipeline +-------- .. currentmodule:: aeon.pipeline @@ -26,3 +21,274 @@ Pipeline construction make_pipeline sklearn_to_aeon + +Testing +------- + +Data Generation +^^^^^^^^^^^^^^^ + +.. currentmodule:: aeon.testing.data_generation + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + make_example_3d_numpy + make_example_2d_numpy_collection + make_example_3d_numpy_list + make_example_2d_numpy_list + make_example_dataframe_list + make_example_2d_dataframe_collection + make_example_multi_index_dataframe + make_example_1d_numpy + make_example_2d_numpy_series + make_example_pandas_series + make_example_dataframe_series + +Estimator Checking +^^^^^^^^^^^^^^^^^^ + +.. currentmodule:: aeon.testing.estimator_checking + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + check_estimator + parametrize_with_checks + +Mock Estimators +^^^^^^^^^^^^^^^ + +.. currentmodule:: aeon.testing.mock_estimators + +.. autosummary:: + :toctree: auto_generated/ + :template: class.rst + + MockAnomalyDetector + MockAnomalyDetectorRequiresFit + MockAnomalyDetectorRequiresY + MockClassifier + MockClassifierPredictProba + MockClassifierFullTags + MockClassifierParams + MockClassifierComposite + MockCluster + MockDeepClusterer + MockCollectionTransformer + MockForecaster + MockRegressor + MockRegressorFullTags + MockSegmenter + MockSegmenterRequiresY + MockSeriesTransformer + MockUnivariateSeriesTransformer + MockMultivariateSeriesTransformer + MockSeriesTransformerNoFit + MockSimilaritySearch + +Utilities +^^^^^^^^^ + +.. currentmodule:: aeon.testing.utils.deep_equals + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + deep_equals + +.. currentmodule:: aeon.testing.utils.output_suppression + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + suppress_output + +Utils +----- + +Estimator Discovery & Tags +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. currentmodule:: aeon.utils.base + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + get_identifier + +.. currentmodule:: aeon.utils.discovery + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + all_estimators + +.. currentmodule:: aeon.utils.tags + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + check_valid_tags + all_tags_for_estimator + + +Data Conversion & Validation +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. currentmodule:: aeon.utils.conversion + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + resolve_equal_length_inner_type + resolve_unequal_length_inner_type + convert_collection + convert_series + +.. currentmodule:: aeon.utils.validation + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + is_int + is_float + is_timedelta + is_date_offset + is_timedelta_or_date_offset + check_n_jobs + check_window_length + get_n_cases + get_type + is_equal_length + has_missing + is_univariate + is_univariate_series + is_single_series + is_collection + is_tabular + is_hierarchical + +Numba +^^^^^ + +.. currentmodule:: aeon.utils.numba.general + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + unique_count + first_order_differences + first_order_differences_2d + first_order_differences_3d + z_normalise_series_with_mean + z_normalise_series + z_normalise_series_2d + z_normalise_series_3d + set_numba_random_seed + choice_log + get_subsequence + get_subsequence_with_mean_std + sliding_mean_std_one_series + combinations_1d + slope_derivative + slope_derivative_2d + slope_derivative_3d + generate_combinations + +.. currentmodule:: aeon.utils.numba.stats + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + mean + row_mean + count_mean_crossing + row_count_mean_crossing + count_above_mean + row_count_above_mean + quantile + row_quantile + median + row_median + quantile25 + row_quantile25 + quantile75 + row_quantile75 + std + std2 + row_std + numba_min + row_numba_min + numba_max + row_numba_max + slope + row_slope + iqr + row_iqr + ppv + row_ppv + fisher_score + prime_up_to + is_prime + +.. currentmodule:: aeon.utils.numba.wavelets + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + haar_transform + multilevel_haar_transform + +Other +^^^^^ + +.. currentmodule:: aeon.utils + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + show_versions + +.. currentmodule:: aeon.utils.sklearn + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + is_sklearn_estimator + sklearn_estimator_identifier + is_sklearn_transformer + is_sklearn_classifier + is_sklearn_regressor + is_sklearn_clusterer + +.. currentmodule:: aeon.utils.split + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + split_series + +.. currentmodule:: aeon.utils.windowing + +.. autosummary:: + :toctree: auto_generated/ + :template: function.rst + + sliding_windows + reverse_windowing diff --git a/docs/contributing.md b/docs/contributing.md index 009714f0f8..c07576fa6b 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -5,6 +5,13 @@ kinds of contributions, not just code. Improvements to docs, bug reports, and ta on communications or code of conduct responsibilities are all examples of valuable contributions beyond code which help make `aeon` a great package. +Please consider whether you will be able to tackle and issue or pull request before +assigning yourself to it. If the issue requires editing Python code, you should have +some experience with Python and be able to run tests. If the issue tackles the +specifics of a machine learning algorithm, some relevant knowledge of machine learning +will be required. While we want to encourage new contributors, a base level +of knowledge is required to make a meaningful contribution to certain issues. + In the following we will give a brief overview of how to contribute to `aeon`. Making contributions to open source projects takes a bit of proactivity and can be daunting at first, but members of the community are here to help and answer questions. If you get @@ -21,7 +28,9 @@ for creating a fork of `aeon`. to complete i.e. improving an algorithm, docstring or test. The [good first issue](https://github.com/aeon-toolkit/aeon/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22) list may be a good place to start. 4. Post on the issue which you want to work on, so that others know you are working on -it. To assign yourself an **Issue/Pull Request**, please post a comment in the issue +it. **First ensure that the issue is not already being worked on. Look if there are any +linked PRs and search the issue number in the pull requests list.** +To assign yourself an **Issue/Pull Request**, please post a comment in the issue including '@aeon-actions-bot', the username of people to assign and the word `assign`: For example: @@ -33,9 +42,11 @@ working on. A Core Developer may suggest a different issue if the one you chose complex or somebody is already working on it. 5. Create a [pull request (PR)](https://github.com/aeon-toolkit/aeon/compare) with your changes from your fork. For help, see the [GitHub documentation](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request-from-a-fork) -or ask in Slack. Follow the PR template and checklist. +or ask in Slack. Follow the PR template and checklist. Please make sure to include +an appropriate [title tag](contributing/issues.md). 6. A Core Developer will review your PR and may provide feedback, which you can then -address. If you are unsure about any feedback, please ask for clarification. +address. If you are unsure about any feedback, please ask for clarification. Please +be patient, as Core Developers are volunteers and may be busy with other tasks. 7. Once your PR is approved, it will be merged into the `aeon` repository. Thanks for making a contribution! Make sure you are included in the [list of contributors](contributors.md). @@ -58,7 +69,8 @@ recognise various types of contributions. Take a look at our past and current If you are a new contributor, make sure we add you to our list of contributors. All contributions are recorded in [.all-contributorsrc](https://github.com/aeon-toolkit/aeon/blob/main/.all-contributorsrc). Alternatively, you can use the [@all-contributors](https://allcontributors.org/docs/en/bot/usage) -bot to do this for you. A list of relevant tags can be found [here](https://allcontributors.org/docs/en/emoji-key). +bot to do this for you. If the contribution is related to a PR, please only create this +when the PR has merged. A list of relevant tags can be found [here](https://allcontributors.org/docs/en/emoji-key). ## Further Reading diff --git a/docs/developer_guide.md b/docs/developer_guide.md index 54bd15659f..b74a075c72 100644 --- a/docs/developer_guide.md +++ b/docs/developer_guide.md @@ -20,9 +20,6 @@ their [developer's guide](https://scikit-learn.org/stable/developers/index.html) :::{grid-item-card} :text-align: center -:::{grid-item-card} -:text-align: center - AEP's ^^^ diff --git a/docs/developer_guide/deprecation.md b/docs/developer_guide/deprecation.md index a4b294b86a..4b10d81cb2 100644 --- a/docs/developer_guide/deprecation.md +++ b/docs/developer_guide/deprecation.md @@ -20,7 +20,6 @@ Note that the deprecation policy does not necessarily apply to modules we class experimental. Currently experimental modules are: - `anomaly_detection` -- `benchmarking` - `forecasting` - `segmentation` - `similarity_search` diff --git a/docs/getting_started.md b/docs/getting_started.md index d5108ade79..36f18583cb 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -4,46 +4,50 @@ The following information is designed to get users up and running with `aeon` qu If installation is required, please see our [installation guide](installation) for installing `aeon`. -We assume basic familiarity with the [scikit-learn](https://scikit-learn.org/stable/index.html) -package. If you want help with scikit-learn you may want to view +We assume basic familiarity with the [`scikit-learn`](https://scikit-learn.org/stable/index.html) +package. If you want help with `scikit-learn` you may want to view [their getting started guides](https://scikit-learn.org/stable/getting_started.html). `aeon` is an open-source toolkit for learning from time series. It provides access to the very latest algorithms for time series machine learning, in addition to a range of classical techniques for the following learning tasks: -- **Classification**, where a collection of time series labelled with - a discrete value is used to train a model to predict unseen cases ([more details](examples/classification/classification.ipynb)). -- **Regression**, where a collection of time series labelled with - a continuous value is used to train a model to predict unseen cases ([more details](examples/regression/regression.ipynb)). -- **Clustering**, where a collection of time series without any - labels are used to train a model to label cases ([more details](examples/clustering/clustering.ipynb)). -- **Similarity search** where the goal is to evaluate the similarity -between a query time series and a collection of other longer time series ([more details](examples/similarity_search/similarity_search.ipynb)). -- **Anomaly detection** where the goal is to find values or areas of a - single time series that are not representative of the whole series. -- **Segmentation** where the goal is to split a single time series into - regions where the series are sofind areas of a time series that are not - representative of the whole series ([more details](examples/segmentation/segmentation.ipynb)). -- **Forecasting**, where the goal is to predict future values for a time - series (new module coming soon). +- [**Classification**](api_reference/classification), where a collection of time series + labelled with a discrete value is used to train a model to predict unseen cases + ([more details](examples/classification/classification.ipynb)). +- [**Regression**](api_reference/regression), where a collection of time series + labelled with a continuous value is used to train a model to predict unseen cases + ([more details](examples/regression/regression.ipynb)). +- [**Clustering**](api_reference/clustering), where a collection of time series without + any labels are used to train a model to label cases + ([more details](examples/clustering/clustering.ipynb)). +- [**Similarity search**](api_reference/similarity_search), where the goal is to evaluate + the similarity between a query time series and a collection of other longer time series + ([more details](examples/similarity_search/similarity_search.ipynb)). +- [**Anomaly detection**](api_reference/anomaly_detection), where the goal is to find + values or areas of a single time series that are not representative of the whole series. +- [**Forecasting**](api_reference/forecasting), where the goal is to predict future values + of a single time series + ([more details](examples/forecasting/forecasting.ipynb)). +- [**Segmentation**](api_reference/segmentation), where the goal is to split a single time + series into regions where the series are sofind areas of a time series that are not + representative of the whole series + ([more details](examples/segmentation/segmentation.ipynb)). `aeon` also provides core modules that are used by the modules above: -- Transformations, where a either a single series or collection is +- [**Transformations**](api_reference/transformations), where a either a single series or collection is transformed into a different representation or domain. ([more details](examples/transformations/transformations.ipynb)). -- Distances, which measure the dissimilarity between two time series or +- [**Distances**](api_reference/distances), which measure the dissimilarity between two time series or collections of series and include functions to align series ([more details](examples/distances/distances.ipynb)). -- Networks, provides core models for deep learning for all time series tasks ([more - details](examples/networks/deep_learning.ipynb)). +- [**Networks**](api_reference/networks), provides core models for deep learning for all time series tasks +- ([more details](examples/networks/deep_learning.ipynb)). -There are dedicated notebooks going into more detail for each of these modules -(linked above). This guide is meant to give you the briefest of -introductions to the main concepts and +There are dedicated notebooks going into more detail for each of these modules. This +guide is meant to give you the briefest of introductions to the main concepts and code for each task to get started. For more information on the variety of estimators available for each task, see the links above, the [API](api_reference) and -[examples](https://www.aeon-toolkit.org/en/latest/examples.html) -pages. +[examples](https://www.aeon-toolkit.org/en/latest/examples.html) pages. ## A Single Time Series @@ -51,8 +55,8 @@ A time series is a series of real valued data assumed to be ordered. A univariat time series has a single value at each time point. For example, the heartbeat ECG reading from a single sensor or the number of passengers using an airline per month would form a univariate series. Single time series are stored -by default in a numpy array (algorithms use numpy arrays internally whenever possible). -We can also handle `pd.Series` and `pd.DataFrame` objects, but these are simply +by default in a `np.ndarray` (which we try to use internally whenever possible). +We can also handle `pd.Series` and `pd.DataFrame` objects as inputs, but these may be converted to `np.ndarray` internally. The airline series is a classic example of a univariate series from the forecasting domain. The series is the monthly totals of international airline passengers, 1949 to 1960, in thousands. @@ -60,7 +64,7 @@ international airline passengers, 1949 to 1960, in thousands. ```{code-block} python >>> from aeon.datasets import load_airline >>> y = load_airline() # load an example univariate series as an array ->>> y[:5] +>>> y[:5] # first five time points 606.0 508.0 461.0 @@ -69,17 +73,18 @@ international airline passengers, 1949 to 1960, in thousands. ``` A multivariate time series is made up of multiple series or channels, where each -observation is a vector of related recordings in the same time index. An examples +observation is a vector of related recordings in the same time index. An example would be a motion trace from a smartwatch with at least three dimensions (X,Y,Z co-ordinates), or multiple financial statistics recorded over time. Single -multivariate series input typically -follows the shape `(n_channels, n_timepoints)` when stored in numpy arrays -(sometimes called wide format). +multivariate series input typically follows the shape `(n_channels, n_timepoints)` by +default. Algorithms may have an `axis` parameter to change this, where `axis=1` assumes +the default shape and is the default setting, and `axis=0` assumes the shape +`(n_timepoints, n_channels)`. ```{code-block} python >>> from aeon.datasets import load_uschange >>> data = load_uschange() # load an example multivariate series ->>> data[:,:5] +>>> data[:,:5] # all channels, first five time points [[ 0.61598622 0.46037569 0.87679142 -0.27424514 1.89737076] [ 0.97226104 1.16908472 1.55327055 -0.25527238 1.98715363] [-2.45270031 -0.55152509 -0.35870786 -2.18545486 1.90973412] @@ -94,12 +99,25 @@ the US Change data loaded above has five channels and 187 time points. For more details on our provided datasets and on how to load data into aeon compatible data structures, see our [datasets](examples/datasets/datasets.ipynb) notebooks. -## Single series modules +## Single Series Modules + +Different `aeon` modules work with individual series or collections of series. +Estimators in the `anomaly detection`, `forecasting` and `segmentation` modules use +single series input (they inherit from `BaseSeriesEstimator`). The functions in +`distances` take two series as arguments. + +### Anomaly Detection + +Anomaly detection (AD) is the process of identifying observations that are significantly +different from the rest of the data. More details to follow soon, once we have +written the notebook. -Different `aeon` module work with individual series or collections of series. Estimators -in the `anomaly detection` and `segmentation` modules use single -series input (they inherit from `BaseSeriesEstimator`). The functions in `distances` -take two series as arguments. +```{code-block} python +>>> from aeon.datasets import load_airline +>>> from aeon.anomaly_detection import STOMP +>>> stomp = STOMP(window_size=200) +>>> scores = est.fit_predict(X) # Get the anomaly scores +``` ### Segmentation @@ -108,9 +126,8 @@ segments or regions that are dissimilar to each other. This could, for example, be the problem of splitting the motion trace from a smartwatch into different activities such as walking, running, and sitting. It is closely related to the field of change point detection, which is a term used more in the statistics -literature. Full information is available in the [segmentation notebooks](Segmentation.ipynb). +literature. -The `aeon` ```{code-block} python >>> from aeon.datasets import load_airline >>> from aeon.segmentation import ClaSPSegmenter @@ -122,6 +139,7 @@ The `aeon` ``` ### Distances + Distances between time series is a primitive operation in very many time series tasks. We have an extensive set of distance functions in the `aeon.distances` module, all optimised using numba. They all work with multivariate and unequal length series. @@ -129,40 +147,18 @@ all optimised using numba. They all work with multivariate and unequal length se ```{code-block} python >>> from aeon.datasets import load_japanese_vowels >>> from aeon.distances import dtw_distance ->>> data = load_japanese_vowels() # load an example multivariate series +>>> data = load_japanese_vowels() # load an example multivariate series collection >>> dtw_distance(data[0], data[1]) # calculate the dtw distance 14.416269807978 ``` -### Anomaly Detection - -Anomaly detection (AD) is the process of identifying observations that are significantly -different from the rest of the data. More details to follow soon, once we have -written the notebook. - -```{code-block} python ->>> from aeon.datasets import load_airline ->>> from aeon.anomaly_detection import STOMP ->>> stomp = STOMP(window_size=200) ->>> scores = est.fit_predict(X) # Get the anomaly scores -``` - - - - -### Forecasting - -A new module for time series forecasting (TSF) is coming soon, we are relaunching our -forecasting module. - - ## Collections of Time Series -The estimators in the `classification`, -`regression` and `clustering` modules learn from collections of time -series (they inherit from the class `BaseCollectionEstimator`). Collections of -time series will often be accompanied by an array of target variables for supervised -learning. The module `similarity_search` also works with collections of time series. +The default storage for collections of time series is a 3D `np.ndarray`. +If `n_timepoints` varies between cases, we store a collection in a `list` of +`np.ndarray` arrays, each with the same number of channels. We do not have the +capability to use collections of time series with varying numbers of channels. +We also assume series length is always the same for all channels of a single series. ```{code-block} python >>> from aeon.datasets import load_italy_power_demand @@ -179,28 +175,17 @@ learning. The module `similarity_search` also works with collections of time ser ['1' '1' '2' '2' '1'] ``` -We use the terms case and instance interchangably when referring to a single time series -contained in a collection. The size of a collection of time series is referred to as -`n_cases` in code. Collections have the shape ` -(n_cases, n_channels, n_timepoints)` if the series are equal length. We -recommend storing collections in 3D numpy arrays even if each time series is univariate (i.e. -`n_channels == 1`). Collection estimators will work with 2D input of shape `(n_cases, -n_timepoints)` as you would -expect from `scikit-learn`, but it is possible to confuse a collection of -univariate series of shape `(n_cases, n_timepoints)` with a single multivariate -series of shape `(n_channels, n_timepoints)`. This potential confusion is one reason -we make the distinction between series and collection estimators. - -If `n_timepoints` varies between cases, we store a collection in a `list` of 2D numpy -arrays, each with the same number of channels. We do not have the capability to use -collections of time series with varying numbers of channels. We also assume series -length is always the same for all channels of a single series. +We use the terms case and instance interchangeably when referring to a single time +series contained in a collection. The size of a time series collection is referred to as +`n_cases` in code. Collections have the shape `(n_cases, n_channels, n_timepoints)`. -Collection estimators closely follow the `scikit-learn` estimator interface, using -`fit`, `predict`, `transform`, `predict_proba`, `fit_predict` and `fit_transform` -where appropriate. They are also designed to work directly with `scikit-learn` -functionality for e.g. model evaluation, parameter searching and pipelines where -appropriate. +We recommend storing collections in a 3D `np.ndarray` even if each time series is +univariate (i.e. `n_channels == 1`). Collection estimators will work with 2D input of +shape `(n_cases, n_timepoints)` as you would expect from `scikit-learn`, but it is +possible to confuse a collection of univariate series of shape `(n_cases, n_timepoints)` +with a single multivariate series of shape `(n_channels, n_timepoints)`. This potential +confusion is one reason we make the distinction between series and collection +estimators. ```{code-block} python >>>from aeon.datasets import load_basic_motions, load_plaid, load_japanese_vowels @@ -220,20 +205,30 @@ appropriate. >>> X4[0].shape (12, 20) ``` + ## Collection based modules +The estimators in the `classification`, `regression` and `clustering` modules learn +from collections of time series (they inherit from the class +`BaseCollectionEstimator`). Collections of time series will often be accompanied by an +array of target variables for supervised learning. The module `similarity_search` also +works with collections of time series. + +Collection estimators closely follow the `scikit-learn` estimator interface, using +`fit`, `predict`, `transform`, `predict_proba`, `fit_predict` and `fit_transform` +where appropriate. They are also designed to work directly with `scikit-learn` +functionality for e.g. model evaluation, parameter searching and pipelines where +appropriate. + ### Classification Time series classification (TSC) involves training a model on a labelled collection of time series. The labels, referred to as `y` in code, should be a `numpy` array of -type `float`, `int` or `str`. Internally the labels are converted to `int` for use -in a training algorithm. +type `int` or `str`. The classification estimator interface should be familiar if you have worked with `scikit-learn`. In this example we fit a [KNeighborsTimeSeriesClassifier](classification.distance_based.KNeighborsTimeSeriesClassifier) -with dynamic time warping (dtw) on our example data. - - +with dynamic time warping (DTW) on our example data. ```{code-block} python >>> import numpy as np @@ -254,9 +249,7 @@ KNeighborsTimeSeriesClassifier() Once the classifier has been fit using the training data and class labels, we can predict the labels for new cases. Like `scikit-learn`, `predict_proba` methods are available to predict class probabilities and a `score` method is present to -calculate accuracy on new data. Explore the wide range of -algorithms available in `aeon`, including the very latest state-of-the-art, in the -[classification notebooks](examples/classification/classification.ipynb). +calculate accuracy on new data. ### Regression @@ -270,12 +263,8 @@ Time series regression is a term commonly used in forecasting when used in conjunction with a sliding window. However, the term also includes "time series extrinsic regression" where the target variable is not future values but some external variable. - In the following example we use a [KNeighborsTimeSeriesRegressor](regression.distance_based.KNeighborsTimeSeriesRegressor) on an example time series regression problem called [Covid3Month](https://zenodo.org/record/3902690). -More info in our [regression notebook](examples/regression/regression.ipynb)). - - ```{code-block} python >>> from aeon.regression.distance_based import KNeighborsTimeSeriesRegressor @@ -298,9 +287,7 @@ KNeighborsTimeSeriesRegressor() Like classification and regression, time series clustering (TSCL) aims to follow the `scikit-learn` interface where possible. The same input data format is used as in the TSC and TSER modules. This example fits a [TimeSeriesKMeans](clustering._k_means.TimeSeriesKMeans) -clusterer on the -[ArrowHead](http://www.timeseriesclassification.com/description.php?Dataset=ArrowHead) -dataset. +clusterer on the [ArrowHead](http://www.timeseriesclassification.com/description.php?Dataset=ArrowHead) dataset. ```{code-block} python >>> from aeon.clustering import TimeSeriesKMeans @@ -318,8 +305,7 @@ TimeSeriesKMeans(n_clusters=3) After calling `fit`, the `labels_` attribute contains the cluster labels for each time series. The `predict` method can be used to predict the cluster labels for -new data. See our clustering notebook for [more details](examples/clustering/clustering.ipynb). - +new data. ### Similarity Search @@ -335,9 +321,16 @@ to this format. This collection, asked for the fit method, is stored as a database. It will be used in the predict method, which expects a single 2D time series as input -(n_channels, query_length). This 2D time series will be used as a query to search for in the 3D database. +(n_channels, query_length). This 2D time series will be used as a query to search for in +the 3D database. -The result of the predict method will then depends on wheter you use the [QuerySearch](similarity_search.query_search.QuerySearch) and the [SeriesSearch](similarity_search.series_search.SeriesSearch) estimator. In [QuerySearch](similarity_search.query_search.QuerySearch), the 2D series is a subsequence for which we want to indentify the best (or worst !) matches in the 3D database. For [SeriesSearch](similarity_search.series_search.SeriesSearch), we require a `length` parmater, and we will search for the best matches of all subsequences of size `length` in the 2D series inside the 3D database. By default, these estimators will use the Euclidean (or squared Euclidean) distance, but more distance will be added in the future. +The result of the predict method will then depends on wheter you use the [QuerySearch](similarity_search.query_search.QuerySearch) +and the [SeriesSearch](similarity_search.series_search.SeriesSearch) estimator. In [QuerySearch](similarity_search.query_search.QuerySearch), the 2D series is a subsequence +for which we want to indentify the best (or worst !) matches in the 3D database. +For [SeriesSearch](similarity_search.series_search.SeriesSearch), we require a `length` parmater, and we will search for the best +matches of all subsequences of size `length` in the 2D series inside the 3D database. +By default, these estimators will use the Euclidean (or squared Euclidean) distance, +but more distance will be added in the future. ```{code-block} python >>> import numpy as np @@ -359,7 +352,7 @@ to the subsequence `X[id_sample, :, id_timestamps:id_timestamp + q.shape[0]]`. ## Transformers We split transformers into two categories: those that transform single time series - and those that transform a collection. +and those that transform a collection. ### Transformers for Single Time Series @@ -381,11 +374,10 @@ class to extract the autocorrelation terms of a time series. [0.96019465 0.89567531 0.83739477 0.7977347 0.78594315] ``` - ### Transformers for Collections of Time Series The `aeon.transformations.collections` module contains a range of transformers for -collections of time series. By default these do not allow for single series input, +collections of time series. These do not allow for single series input, treat 2D input types as a collection of univariate series, and have no restrictions on the datatype of output. @@ -437,7 +429,6 @@ series and process unequal length collections. 3.48004859 3.91447337 3.19663426 0. 0. 0. ]]] ``` - ## Pipelines for aeon estimators Like `scikit-learn`, `aeon` provides pipeline classes which can be used to chain diff --git a/docs/index.md b/docs/index.md index e36cdfc8f9..974d181060 100644 --- a/docs/index.md +++ b/docs/index.md @@ -110,6 +110,25 @@ Anomaly Detection ::: +:::{grid-item-card} +:img-top: examples/forecasting/img/forecasting.png +:class-img-top: aeon-card-image +:text-align: center + +Get started with forecasting + ++++ + +```{button-ref} /examples/forecasting/forecasting.ipynb +:color: primary +:click-parent: +:expand: + +Forecasting +``` + +::: + :::{grid-item-card} :img-top: examples/segmentation/img/segmentation.png :class-img-top: aeon-card-image @@ -253,6 +272,7 @@ is relaxed, so it is suggested that you integrate these modules with care. The c experimental modules are: - `anomaly_detection` +- `forecasting` - `segmentation` - `similarity_search` - `visualisation` diff --git a/examples/distances/distances.ipynb b/examples/distances/distances.ipynb index fbd557ec47..6e441e5533 100644 --- a/examples/distances/distances.ipynb +++ b/examples/distances/distances.ipynb @@ -192,7 +192,7 @@ "\n", "d1 = euclidean_distance(first, second)\n", "d2 = euclidean_distance(first, third)\n", - "d3 = distance(second, third, metric=\"euclidean\")\n", + "d3 = distance(second, third, measure=\"euclidean\")\n", "print(d1, \",\", d2, \",\", d3)" ] }, @@ -568,7 +568,7 @@ "y = np.array([[2, 3, 4, 5, 6, 7]])\n", "p, d = dtw_alignment_path(x, y)\n", "print(\"path =\", p, \" distance = \", d)\n", - "p, d = alignment_path(x, y, metric=\"dtw\")\n", + "p, d = alignment_path(x, y, measure=\"dtw\")\n", "print(\"path =\", p, \" distance = \", d)" ] }, diff --git a/examples/forecasting/forecasting.ipynb b/examples/forecasting/forecasting.ipynb index e17b6667dc..0e0b4ac72f 100644 --- a/examples/forecasting/forecasting.ipynb +++ b/examples/forecasting/forecasting.ipynb @@ -109,7 +109,7 @@ { "cell_type": "code", "source": [ - "from aeon.utils import SERIES_DATA_TYPES\n", + "from aeon.utils.data_types import SERIES_DATA_TYPES\n", "\n", "print(\" Possible data structures for input to forecaster \", SERIES_DATA_TYPES)\n", "print(\"\\n Tags for BaseForecaster: \", BaseForecaster.get_class_tags())" diff --git a/examples/forecasting/img/forecasting.png b/examples/forecasting/img/forecasting.png new file mode 100644 index 0000000000..c9316dbe5a Binary files /dev/null and b/examples/forecasting/img/forecasting.png differ diff --git a/examples/networks/deep_learning.ipynb b/examples/networks/deep_learning.ipynb index 9e06feabbb..7cf092f7f3 100644 --- a/examples/networks/deep_learning.ipynb +++ b/examples/networks/deep_learning.ipynb @@ -64,8 +64,8 @@ "cell_type": "code", "metadata": { "ExecuteTime": { - "end_time": "2024-11-21T11:14:08.477299Z", - "start_time": "2024-11-21T11:14:08.433390Z" + "end_time": "2024-11-25T16:48:00.794715Z", + "start_time": "2024-11-25T16:48:00.780244Z" } }, "source": [ @@ -97,7 +97,7 @@ "from aeon.regression.deep_learning import InceptionTimeRegressor" ], "outputs": [], - "execution_count": 7 + "execution_count": 12 }, { "attachments": {}, @@ -116,17 +116,18 @@ "cell_type": "code", "metadata": { "ExecuteTime": { - "end_time": "2024-11-21T11:12:15.141664Z", - "start_time": "2024-11-21T11:12:02.084792Z" + "end_time": "2024-11-25T16:48:20.910216Z", + "start_time": "2024-11-25T16:48:02.721649Z" } }, "source": [ - "xtrain, ytrain = load_classification(name=\"ArrowHead\", split=\"train\")\n", - "xtest, ytest = load_classification(name=\"ArrowHead\", split=\"test\")\n", - "\n", - "inc = InceptionTimeClassifier(n_classifiers=5, use_custom_filters=False, n_epochs=3)\n", + "xtrain, ytrain = load_classification(name=\"GunPoint\", split=\"train\")\n", + "xtest, ytest = load_classification(name=\"GunPoint\", split=\"test\")\n", + "xtrain = xtrain[:10, :, :]\n", + "ytrain = ytrain[:10]\n", + "inc = InceptionTimeClassifier(n_classifiers=2, use_custom_filters=False, n_epochs=2)\n", "inc.fit(X=xtrain, y=ytrain)\n", - "ypred = inc.predict(X=xtest)\n", + "ypred = inc.predict(X=xtest[0:5, :, :])\n", "\n", "print(\"Predictions: \", ypred[0:5])\n", "print(\"Ground Truth: \", ytest[0:5])" @@ -136,17 +137,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 72ms/step\n", - "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 78ms/step\n", - "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 72ms/step\n", - "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 74ms/step\n", - "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 82ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 358ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 386ms/step\n", "Predictions: ['2' '2' '2' '2' '2']\n", - "Ground Truth: ['0' '0' '0' '0' '0']\n" + "Ground Truth: ['1' '2' '2' '1' '1']\n" ] } ], - "execution_count": 2 + "execution_count": 13 }, { "attachments": {}, @@ -170,17 +168,14 @@ "cell_type": "code", "metadata": { "ExecuteTime": { - "end_time": "2024-11-21T11:12:30.252190Z", - "start_time": "2024-11-21T11:12:15.222773Z" + "end_time": "2024-11-25T16:48:52.055955Z", + "start_time": "2024-11-25T16:48:31.172431Z" } }, "source": [ - "xtrain, ytrain = load_classification(name=\"ArrowHead\", split=\"train\")\n", - "xtest, ytest = load_classification(name=\"ArrowHead\", split=\"test\")\n", - "\n", - "inc = InceptionTimeClassifier(n_classifiers=5, use_custom_filters=True, n_epochs=3)\n", + "inc = InceptionTimeClassifier(n_classifiers=2, use_custom_filters=True, n_epochs=2)\n", "inc.fit(X=xtrain, y=ytrain)\n", - "ypred = inc.predict(X=xtest)\n", + "ypred = inc.predict(X=xtest[0:5, :, :])\n", "\n", "print(\"Predictions: \", ypred[0:5])\n", "print(\"Ground Truth: \", ytest[0:5])" @@ -190,17 +185,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 94ms/step\n", - "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 91ms/step\n", - "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 94ms/step\n", - "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 97ms/step\n", - "\u001B[1m3/3\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 96ms/step\n", - "Predictions: ['0' '0' '0' '0' '0']\n", - "Ground Truth: ['0' '0' '0' '0' '0']\n" + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 557ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1s/step\n", + "Predictions: ['2' '2' '2' '2' '2']\n", + "Ground Truth: ['1' '2' '2' '1' '1']\n" ] } ], - "execution_count": 3 + "execution_count": 14 }, { "cell_type": "markdown", @@ -217,37 +209,36 @@ "cell_type": "code", "metadata": { "ExecuteTime": { - "end_time": "2024-11-21T11:12:43.183566Z", - "start_time": "2024-11-21T11:12:30.257417Z" + "end_time": "2024-11-25T16:49:32.976474Z", + "start_time": "2024-11-25T16:49:14.568788Z" } }, "source": [ - "xtrain, ytrain = load_regression(name=\"Covid3Month\", split=\"train\")\n", - "xtest, ytest = load_regression(name=\"Covid3Month\", split=\"test\")\n", + "x_train, y_train = load_regression(name=\"Covid3Month\", split=\"train\")\n", + "x_test, y_test = load_regression(name=\"Covid3Month\", split=\"test\")\n", + "x_train = x_train[:10, :, :]\n", + "y_train = y_train[:10]\n", "\n", - "inc = InceptionTimeRegressor(n_regressors=5, n_epochs=1, use_custom_filters=False)\n", - "inc.fit(X=xtrain, y=ytrain)\n", - "ypred = inc.predict(X=xtest)\n", + "inc = InceptionTimeRegressor(n_regressors=2, n_epochs=1, use_custom_filters=False)\n", + "inc.fit(X=x_train, y=y_train)\n", + "ypred = inc.predict(X=x_test[0:5, :, :])\n", "\n", "print(\"Predictions: \", ypred[0:5])\n", - "print(\"Ground Truth: \", ytest[0:5])" + "print(\"Ground Truth: \", y_train[0:5])" ], "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 107ms/step\n", - "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 109ms/step\n", - "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 112ms/step\n", - "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 114ms/step\n", - "\u001B[1m1/1\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 108ms/step\n", - "Predictions: [-0.4258549 -0.0387525 -0.01732254 -0.60533425 -4.51287463]\n", - "Ground Truth: [0.0118838 0.00379507 0.08298755 0.04510921 0.12783075]\n" + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 374ms/step\n", + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 368ms/step\n", + "Predictions: [ -3.88514614 -0.3810918 -0.2344005 -5.31711912 -37.39011002]\n", + "Ground Truth: [0. 0.07758621 0. 0. 0.15400309]\n" ] } ], - "execution_count": 4 + "execution_count": 15 }, { "attachments": {}, @@ -274,18 +265,15 @@ "cell_type": "code", "metadata": { "ExecuteTime": { - "end_time": "2024-11-21T11:14:19.577898Z", - "start_time": "2024-11-21T11:14:16.120345Z" + "end_time": "2024-11-25T16:49:45.025079Z", + "start_time": "2024-11-25T16:49:38.455222Z" } }, "source": [ - "xtrain, _ = load_classification(name=\"ArrowHead\", split=\"train\")\n", - "xtest, ytest = load_classification(name=\"ArrowHead\", split=\"test\")\n", - "\n", "aefcn = AEFCNClusterer(\n", " temporal_latent_space=False,\n", " estimator=KMeans(n_clusters=3),\n", - " n_epochs=10,\n", + " n_epochs=3,\n", ")\n", "\n", "aefcn.fit(X=xtrain)\n", @@ -298,14 +286,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "\u001B[1m2/2\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 27ms/step\n", - "\u001B[1m6/6\u001B[0m \u001B[32m━━━━━━━━━━━━━━━━━━━━\u001B[0m\u001B[37m\u001B[0m \u001B[1m0s\u001B[0m 8ms/step \n", - "Predictions: [2 0 2 2 2]\n", - "Ground Truth: ['0' '0' '0' '0' '0']\n" + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 99ms/step\n", + "\u001b[1m5/5\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 14ms/step\n", + "Predictions: [1 0 1 1 0]\n", + "Ground Truth: ['1' '2' '2' '1' '1']\n" ] } ], - "execution_count": 8 + "execution_count": 16 }, { "attachments": {}, @@ -332,42 +320,13 @@ }, { "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "11/11 [==============================] - 0s 29ms/step\n", - "11/11 [==============================] - 0s 29ms/step\n", - "['1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1']\n", - "['1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", - " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1']\n" - ] + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-25T16:49:51.216960Z", + "start_time": "2024-11-25T16:49:47.246635Z" } - ], + }, "source": [ - "xtrain, ytrain = load_classification(name=\"ArrowHead\", split=\"train\")\n", - "xtest, ytest = load_classification(name=\"ArrowHead\", split=\"test\")\n", - "\n", "fcn = FCNClassifier(\n", " save_best_model=True,\n", " save_last_model=True,\n", @@ -392,7 +351,36 @@ "\n", "os.remove(\"./best_fcn.keras\")\n", "os.remove(\"./last_fcn.keras\")" - ] + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 15ms/step\n", + "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 17ms/step\n", + "['1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1']\n", + "['1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1' '1'\n", + " '1' '1' '1' '1' '1' '1']\n" + ] + } + ], + "execution_count": 17 }, { "attachments": {}, @@ -409,24 +397,23 @@ }, { "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-25T16:50:17.048341Z", + "start_time": "2024-11-25T16:50:17.030069Z" + } + }, "source": [ "# define self-supervised space dimension\n", "\n", "n_dim = 16\n", - "\n", - "# load the data\n", - "\n", - "xtrain, ytrain = load_classification(name=\"ArrowHead\", split=\"train\")\n", - "xtest, ytest = load_classification(name=\"ArrowHead\", split=\"test\")\n", - "\n", "# Flip axis to be handled correctly in tensorflow\n", "\n", "xtrain = np.transpose(xtrain, axes=(0, 2, 1))\n", "xtest = np.transpose(xtest, axes=(0, 2, 1))" - ] + ], + "outputs": [], + "execution_count": 18 }, { "cell_type": "markdown", @@ -437,9 +424,12 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-25T16:50:22.513036Z", + "start_time": "2024-11-25T16:50:22.499412Z" + } + }, "source": [ "def triplet_loss_function(alpha):\n", " \"\"\"Create a triplet loss function for triplet-based training.\"\"\"\n", @@ -466,7 +456,9 @@ " return loss\n", "\n", " return temp" - ] + ], + "outputs": [], + "execution_count": 19 }, { "cell_type": "markdown", @@ -477,9 +469,12 @@ }, { "cell_type": "code", - "execution_count": 16, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-25T16:50:26.420369Z", + "start_time": "2024-11-25T16:50:26.207821Z" + } + }, "source": [ "# Define the triplets input layers\n", "\n", @@ -512,7 +507,9 @@ ")\n", "\n", "SSL_model.compile(loss=triplet_loss_function(alpha=1e-5))" - ] + ], + "outputs": [], + "execution_count": 20 }, { "cell_type": "markdown", @@ -523,9 +520,12 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-25T16:50:28.681579Z", + "start_time": "2024-11-25T16:50:28.664819Z" + } + }, "source": [ "def triplet_generation(x):\n", " \"\"\"Generate triplet samples (ref, pos, neg) for triplet loss training.\"\"\"\n", @@ -570,7 +570,9 @@ " neg[i] = w1 * nota + w2 * b2 + w2 * c2\n", "\n", " return ref, pos, neg" - ] + ], + "outputs": [], + "execution_count": 21 }, { "cell_type": "markdown", @@ -581,12 +583,17 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-25T16:50:30.987857Z", + "start_time": "2024-11-25T16:50:30.983090Z" + } + }, "source": [ "xtrain_ref, xtrain_pos, xtrain_neg = triplet_generation(x=xtrain)" - ] + ], + "outputs": [], + "execution_count": 22 }, { "cell_type": "markdown", @@ -597,9 +604,12 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-25T16:50:35.325071Z", + "start_time": "2024-11-25T16:50:35.314592Z" + } + }, "source": [ "reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(\n", " monitor=\"loss\", factor=0.5, patience=50, min_lr=0.0001\n", @@ -612,7 +622,9 @@ ")\n", "\n", "callbacks = [reduce_lr, model_checkpoint]" - ] + ], + "outputs": [], + "execution_count": 23 }, { "cell_type": "markdown", @@ -623,25 +635,17 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-25T16:50:53.529773Z", + "start_time": "2024-11-25T16:50:44.026512Z" } - ], + }, "source": [ "history = SSL_model.fit(\n", " [xtrain_ref, xtrain_pos, xtrain_neg],\n", " np.zeros(shape=len(xtrain)),\n", - " epochs=20,\n", + " epochs=4,\n", " callbacks=callbacks,\n", " verbose=False,\n", ")\n", @@ -650,7 +654,20 @@ "plt.plot(history.history[\"loss\"], lw=3, color=\"blue\", label=\"training loss\")\n", "plt.legend()\n", "plt.show()" - ] + ], + "outputs": [ + { + "data": { + "text/plain": [ + "
" + ], + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "execution_count": 24 }, { "cell_type": "markdown", @@ -661,27 +678,12 @@ }, { "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2/2 [==============================] - 1s 10ms/step\n" - ] - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-25T16:50:57.858973Z", + "start_time": "2024-11-25T16:50:56.487914Z" } - ], + }, "source": [ "plt.cla()\n", "plt.clf()\n", @@ -711,16 +713,41 @@ "\n", "plt.legend()\n", "plt.show()" - ] + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 931ms/step\n" + ] + }, + { + "data": { + "text/plain": [ + "
" + ], + "image/png": "" + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "execution_count": 25 }, { "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], + "metadata": { + "ExecuteTime": { + "end_time": "2024-11-25T16:51:00.107975Z", + "start_time": "2024-11-25T16:51:00.093491Z" + } + }, "source": [ "os.remove(\"./best_ssl_model.keras\")" - ] + ], + "outputs": [], + "execution_count": 26 }, { "attachments": {}, diff --git a/examples/transformations/rocket.ipynb b/examples/transformations/rocket.ipynb index 3e1ed7db1d..c992fc1952 100644 --- a/examples/transformations/rocket.ipynb +++ b/examples/transformations/rocket.ipynb @@ -32,41 +32,49 @@ }, { "cell_type": "code", - "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:46.441933Z", "iopub.status.busy": "2020-12-19T14:32:46.441213Z", "iopub.status.idle": "2020-12-19T14:32:46.443225Z", "shell.execute_reply": "2020-12-19T14:32:46.444014Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:32.515016Z", + "start_time": "2024-11-25T17:01:32.504509Z" } }, - "outputs": [], "source": [ "# !pip install --upgrade numba" - ] + ], + "outputs": [], + "execution_count": 33 }, { "cell_type": "code", - "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:46.448396Z", "iopub.status.busy": "2020-12-19T14:32:46.447602Z", "iopub.status.idle": "2020-12-19T14:32:51.904418Z", "shell.execute_reply": "2020-12-19T14:32:51.905034Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:33.167188Z", + "start_time": "2024-11-25T17:01:33.161609Z" } }, - "outputs": [], "source": [ "import numpy as np\n", "from sklearn.linear_model import RidgeClassifierCV\n", "from sklearn.pipeline import make_pipeline\n", "\n", - "from aeon.datasets import load_arrow_head # univariate dataset\n", "from aeon.datasets import load_basic_motions # multivariate dataset\n", + "from aeon.datasets import load_gunpoint # univariate dataset\n", "from aeon.transformations.collection.convolution_based import Rocket" - ] + ], + "outputs": [], + "execution_count": 34 }, { "cell_type": "markdown", @@ -83,19 +91,34 @@ }, { "cell_type": "code", - "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:51.908710Z", "iopub.status.busy": "2020-12-19T14:32:51.908101Z", "iopub.status.idle": "2020-12-19T14:32:51.918987Z", "shell.execute_reply": "2020-12-19T14:32:51.919508Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:34.603321Z", + "start_time": "2024-11-25T17:01:34.573759Z" } }, - "outputs": [], "source": [ - "X_train, y_train = load_arrow_head(split=\"train\")" - ] + "X_train, y_train = load_gunpoint(split=\"train\")\n", + "X_train = X_train[:5, :, :]\n", + "y_train = y_train[:5]\n", + "print(X_train.shape)" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(5, 1, 150)\n" + ] + } + ], + "execution_count": 35 }, { "cell_type": "markdown", @@ -106,21 +129,34 @@ }, { "cell_type": "code", - "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:51.923023Z", "iopub.status.busy": "2020-12-19T14:32:51.922451Z", "iopub.status.idle": "2020-12-19T14:32:52.164365Z", "shell.execute_reply": "2020-12-19T14:32:52.164864Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:35.852821Z", + "start_time": "2024-11-25T17:01:35.837832Z" } }, - "outputs": [], "source": [ - "rocket = Rocket() # by default, ROCKET uses 10,000 kernels\n", + "rocket = Rocket(n_kernels=100) # by default, ROCKET uses 10,000 kernels\n", "rocket.fit(X_train)\n", - "X_train_transform = rocket.transform(X_train)" - ] + "X_train_transform = rocket.transform(X_train)\n", + "print(X_train_transform.shape)" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(5, 200)\n" + ] + } + ], + "execution_count": 36 }, { "cell_type": "markdown", @@ -138,30 +174,448 @@ }, { "cell_type": "code", - "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:52.168847Z", "iopub.status.busy": "2020-12-19T14:32:52.168155Z", "iopub.status.idle": "2020-12-19T14:32:52.284816Z", "shell.execute_reply": "2020-12-19T14:32:52.285506Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:38.060428Z", + "start_time": "2024-11-25T17:01:38.038775Z" } }, + "source": [ + "classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))\n", + "classifier.fit(X_train_transform, y_train)" + ], "outputs": [ { "data": { - "text/plain": "RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n 4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n 2.15443469e+02, 1.00000000e+03]))", - "text/html": "
RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n       2.15443469e+02, 1.00000000e+03]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "text/plain": [ + "RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n", + " 4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n", + " 2.15443469e+02, 1.00000000e+03]))" + ], + "text/html": [ + "
RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n",
+       "       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n",
+       "       2.15443469e+02, 1.00000000e+03]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] }, - "execution_count": 5, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))\n", - "classifier.fit(X_train_transform, y_train)" - ] + "execution_count": 37 }, { "cell_type": "markdown", @@ -172,20 +626,24 @@ }, { "cell_type": "code", - "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:52.289448Z", "iopub.status.busy": "2020-12-19T14:32:52.288717Z", "iopub.status.idle": "2020-12-19T14:32:53.307829Z", "shell.execute_reply": "2020-12-19T14:32:53.308341Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:39.178929Z", + "start_time": "2024-11-25T17:01:39.136007Z" } }, - "outputs": [], "source": [ - "X_test, y_test = load_arrow_head(split=\"test\")\n", + "X_test, y_test = load_gunpoint(split=\"test\")\n", "X_test_transform = rocket.transform(X_test)" - ] + ], + "outputs": [], + "execution_count": 38 }, { "cell_type": "markdown", @@ -196,7 +654,6 @@ }, { "cell_type": "code", - "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:53.312125Z", @@ -204,21 +661,28 @@ "iopub.status.idle": "2020-12-19T14:32:53.409775Z", "shell.execute_reply": "2020-12-19T14:32:53.410342Z" }, - "scrolled": true + "scrolled": true, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:40.547350Z", + "start_time": "2024-11-25T17:01:40.533334Z" + } }, + "source": [ + "classifier.score(X_test_transform, y_test)" + ], "outputs": [ { "data": { - "text/plain": "0.7771428571428571" + "text/plain": [ + "0.64" + ] }, - "execution_count": 7, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "classifier.score(X_test_transform, y_test)" - ] + "execution_count": 39 }, { "cell_type": "markdown", @@ -235,19 +699,23 @@ }, { "cell_type": "code", - "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:53.413597Z", "iopub.status.busy": "2020-12-19T14:32:53.412786Z", "iopub.status.idle": "2020-12-19T14:32:53.775638Z", "shell.execute_reply": "2020-12-19T14:32:53.776690Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:41.782580Z", + "start_time": "2024-11-25T17:01:41.767897Z" } }, - "outputs": [], "source": [ "X_train, y_train = load_basic_motions(split=\"train\")" - ] + ], + "outputs": [], + "execution_count": 40 }, { "cell_type": "markdown", @@ -258,21 +726,37 @@ }, { "cell_type": "code", - "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:53.794896Z", "iopub.status.busy": "2020-12-19T14:32:53.794345Z", "iopub.status.idle": "2020-12-19T14:32:54.613570Z", "shell.execute_reply": "2020-12-19T14:32:54.614198Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:42.949980Z", + "start_time": "2024-11-25T17:01:42.918211Z" } }, - "outputs": [], "source": [ - "rocket = Rocket()\n", + "rocket = Rocket(n_kernels=100) # by default, ROCKET uses 10,000 kernels\n", "rocket.fit(X_train)\n", - "X_train_transform = rocket.transform(X_train)" - ] + "X_train_transform = rocket.transform(X_train)\n", + "X_train_transform.shape" + ], + "outputs": [ + { + "data": { + "text/plain": [ + "(40, 200)" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 41 }, { "cell_type": "markdown", @@ -283,30 +767,448 @@ }, { "cell_type": "code", - "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:54.618359Z", "iopub.status.busy": "2020-12-19T14:32:54.617890Z", "iopub.status.idle": "2020-12-19T14:32:54.836560Z", "shell.execute_reply": "2020-12-19T14:32:54.837249Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:44.038154Z", + "start_time": "2024-11-25T17:01:44.002549Z" } }, + "source": [ + "classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))\n", + "classifier.fit(X_train_transform, y_train)" + ], "outputs": [ { "data": { - "text/plain": "RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n 4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n 2.15443469e+02, 1.00000000e+03]))", - "text/html": "
RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n       2.15443469e+02, 1.00000000e+03]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "text/plain": [ + "RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n", + " 4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n", + " 2.15443469e+02, 1.00000000e+03]))" + ], + "text/html": [ + "
RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n",
+       "       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n",
+       "       2.15443469e+02, 1.00000000e+03]))
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] }, - "execution_count": 10, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))\n", - "classifier.fit(X_train_transform, y_train)" - ] + "execution_count": 42 }, { "cell_type": "markdown", @@ -317,20 +1219,24 @@ }, { "cell_type": "code", - "execution_count": 11, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:54.841004Z", "iopub.status.busy": "2020-12-19T14:32:54.840351Z", "iopub.status.idle": "2020-12-19T14:32:55.906455Z", "shell.execute_reply": "2020-12-19T14:32:55.907064Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:45.150937Z", + "start_time": "2024-11-25T17:01:45.106121Z" } }, - "outputs": [], "source": [ "X_test, y_test = load_basic_motions(split=\"test\")\n", "X_test_transform = rocket.transform(X_test)" - ] + ], + "outputs": [], + "execution_count": 43 }, { "cell_type": "markdown", @@ -341,7 +1247,6 @@ }, { "cell_type": "code", - "execution_count": 12, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:55.910253Z", @@ -349,21 +1254,28 @@ "iopub.status.idle": "2020-12-19T14:32:56.008364Z", "shell.execute_reply": "2020-12-19T14:32:56.008931Z" }, - "scrolled": true + "scrolled": true, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:46.229312Z", + "start_time": "2024-11-25T17:01:46.215072Z" + } }, + "source": [ + "classifier.score(X_test_transform, y_test)" + ], "outputs": [ { "data": { - "text/plain": "0.975" + "text/plain": [ + "0.975" + ] }, - "execution_count": 12, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "classifier.score(X_test_transform, y_test)" - ] + "execution_count": 44 }, { "cell_type": "markdown", @@ -380,21 +1292,25 @@ }, { "cell_type": "code", - "execution_count": 13, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:56.012465Z", "iopub.status.busy": "2020-12-19T14:32:56.011939Z", "iopub.status.idle": "2020-12-19T14:32:56.013801Z", "shell.execute_reply": "2020-12-19T14:32:56.014399Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:47.349648Z", + "start_time": "2024-11-25T17:01:47.345129Z" } }, - "outputs": [], "source": [ "rocket_pipeline = make_pipeline(\n", " Rocket(), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))\n", ")" - ] + ], + "outputs": [], + "execution_count": 45 }, { "cell_type": "markdown", @@ -405,33 +1321,457 @@ }, { "cell_type": "code", - "execution_count": 14, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:56.017692Z", "iopub.status.busy": "2020-12-19T14:32:56.017166Z", "iopub.status.idle": "2020-12-19T14:32:56.420648Z", "shell.execute_reply": "2020-12-19T14:32:56.421247Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:49.740497Z", + "start_time": "2024-11-25T17:01:48.459632Z" } }, + "source": [ + "# it is necessary to pass y_train to the pipeline\n", + "# y_train is not used for the transform, but it is used by the classifier\n", + "rocket_pipeline.fit(X_train, y_train)" + ], "outputs": [ { "data": { - "text/plain": "Pipeline(steps=[('rocket', Rocket()),\n ('ridgeclassifiercv',\n RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n 4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n 2.15443469e+02, 1.00000000e+03])))])", - "text/html": "
Pipeline(steps=[('rocket', Rocket()),\n                ('ridgeclassifiercv',\n                 RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n       2.15443469e+02, 1.00000000e+03])))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + "text/plain": [ + "Pipeline(steps=[('rocket', Rocket()),\n", + " ('ridgeclassifiercv',\n", + " RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n", + " 4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n", + " 2.15443469e+02, 1.00000000e+03])))])" + ], + "text/html": [ + "
Pipeline(steps=[('rocket', Rocket()),\n",
+       "                ('ridgeclassifiercv',\n",
+       "                 RidgeClassifierCV(alphas=array([1.00000000e-03, 4.64158883e-03, 2.15443469e-02, 1.00000000e-01,\n",
+       "       4.64158883e-01, 2.15443469e+00, 1.00000000e+01, 4.64158883e+01,\n",
+       "       2.15443469e+02, 1.00000000e+03])))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] }, - "execution_count": 14, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "X_train, y_train = load_arrow_head(split=\"train\")\n", - "\n", - "# it is necessary to pass y_train to the pipeline\n", - "# y_train is not used for the transform, but it is used by the classifier\n", - "rocket_pipeline.fit(X_train, y_train)" - ] + "execution_count": 46 }, { "cell_type": "markdown", @@ -442,30 +1782,41 @@ }, { "cell_type": "code", - "execution_count": 15, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:32:56.425026Z", "iopub.status.busy": "2020-12-19T14:32:56.424348Z", "iopub.status.idle": "2020-12-19T14:32:57.602704Z", "shell.execute_reply": "2020-12-19T14:32:57.603291Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T17:01:50.960464Z", + "start_time": "2024-11-25T17:01:49.763086Z" } }, + "source": [ + "rocket_pipeline.score(X_test, y_test)" + ], "outputs": [ { "data": { - "text/plain": "0.7885714285714286" + "text/plain": [ + "0.975" + ] }, - "execution_count": 15, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "X_test, y_test = load_arrow_head(split=\"test\")\n", - "\n", - "rocket_pipeline.score(X_test, y_test)" - ] + "execution_count": 47 + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": "" } ], "metadata": { diff --git a/examples/transformations/tsfresh.ipynb b/examples/transformations/tsfresh.ipynb index da00e2f48e..d1d37d1761 100644 --- a/examples/transformations/tsfresh.ipynb +++ b/examples/transformations/tsfresh.ipynb @@ -17,39 +17,47 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:30:39.713903Z", "iopub.status.busy": "2020-12-19T14:30:39.713342Z", "iopub.status.idle": "2020-12-19T14:30:39.715128Z", "shell.execute_reply": "2020-12-19T14:30:39.715641Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T14:07:05.457198Z", + "start_time": "2024-11-25T14:07:05.449815Z" } }, - "outputs": [], "source": [ "# !pip install --upgrade tsfresh" - ] + ], + "outputs": [], + "execution_count": 1 }, { "cell_type": "code", - "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:30:39.719083Z", "iopub.status.busy": "2020-12-19T14:30:39.718586Z", "iopub.status.idle": "2020-12-19T14:30:40.743724Z", "shell.execute_reply": "2020-12-19T14:30:40.744213Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T14:07:07.829632Z", + "start_time": "2024-11-25T14:07:06.056664Z" } }, - "outputs": [], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.pipeline import make_pipeline\n", "\n", "from aeon.datasets import load_arrow_head, load_basic_motions\n", "from aeon.transformations.collection.feature_based import TSFresh, TSFreshRelevant" - ] + ], + "outputs": [], + "execution_count": 2 }, { "cell_type": "markdown", @@ -59,46 +67,43 @@ "\n", "We use the ArrowHead data from the [UCR TSC archive](https://timeseriesclassification.com).\n", "as an example dataset. See\n", - "[dataset notebook](https://github.com/aeon-toolkit/aeon/blob/main/examples/datasets\n", - "/provided_data.ipynb) for more details." + "[dataset notebook](https://github.com/aeon-toolkit/aeon/blob/main/examples/datasets/provided_data.ipynb) for more details. We only use the first few cases for examples to speed up the \n", + "notebook. " ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:30:40.748159Z", "iopub.status.busy": "2020-12-19T14:30:40.747656Z", "iopub.status.idle": "2020-12-19T14:30:40.795200Z", "shell.execute_reply": "2020-12-19T14:30:40.795889Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T14:07:09.120656Z", + "start_time": "2024-11-25T14:07:09.090118Z" } }, - "outputs": [], "source": [ - "X_train, y_train = load_arrow_head(split=\"train\")\n", - "X_test, y_test = load_arrow_head(split=\"test\")\n", + "X, y = load_arrow_head()\n", + "n_cases = 24\n", + "X_train = X[:n_cases, :, :]\n", + "y_train = y[:n_cases]\n", + "X_test = X[n_cases : 2 * n_cases, :, :]\n", + "y_test = y[n_cases : 2 * n_cases]\n", "print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "execution": { - "iopub.execute_input": "2020-12-19T14:30:40.808841Z", - "iopub.status.busy": "2020-12-19T14:30:40.808198Z", - "iopub.status.idle": "2020-12-19T14:30:40.816155Z", - "shell.execute_reply": "2020-12-19T14:30:40.816682Z" - }, - "jupyter": { - "outputs_hidden": false + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(24, 1, 251) (24,) (24, 1, 251) (24,)\n" + ] } - }, - "outputs": [], - "source": [ - "X_train[0]" - ] + ], + "execution_count": 3 }, { "cell_type": "markdown", @@ -114,22 +119,34 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:30:40.829452Z", "iopub.status.busy": "2020-12-19T14:30:40.828907Z", "iopub.status.idle": "2020-12-19T14:30:53.049755Z", "shell.execute_reply": "2020-12-19T14:30:53.050249Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T14:07:16.339473Z", + "start_time": "2024-11-25T14:07:11.573523Z" } }, - "outputs": [], "source": [ "t = TSFresh()\n", "Xt = t.fit_transform(X_train)\n", - "Xt.shape\n", - "Xt2 = t.transform(X_test)" - ] + "Xt2 = t.transform(X_test)\n", + "print(f\"Train shape = {Xt.shape} test shape = {Xt2.shape}\")" + ], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Train shape = (24, 777) test shape = (24, 777)\n" + ] + } + ], + "execution_count": 4 }, { "cell_type": "markdown", @@ -143,8 +160,6 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [], "source": [ "t = TSFreshRelevant()\n", "t.fit(X_train, y_train)\n", @@ -152,8 +167,25 @@ "Xt.shape" ], "metadata": { - "collapsed": false - } + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-11-25T14:07:32.455607Z", + "start_time": "2024-11-25T14:07:26.124172Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(24, 75)" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 5 }, { "cell_type": "markdown", @@ -166,16 +198,18 @@ }, { "cell_type": "code", - "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:30:53.062147Z", "iopub.status.busy": "2020-12-19T14:30:53.061631Z", "iopub.status.idle": "2020-12-19T14:31:09.307275Z", "shell.execute_reply": "2020-12-19T14:31:09.307781Z" + }, + "ExecuteTime": { + "end_time": "2024-11-25T14:07:41.090159Z", + "start_time": "2024-11-25T14:07:36.403997Z" } }, - "outputs": [], "source": [ "classifier = make_pipeline(\n", " TSFresh(default_fc_parameters=\"efficient\", show_warnings=False),\n", @@ -183,7 +217,20 @@ ")\n", "classifier.fit(X_train, y_train)\n", "classifier.score(X_test, y_test)" - ] + ], + "outputs": [ + { + "data": { + "text/plain": [ + "0.625" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 6 }, { "cell_type": "markdown", @@ -197,26 +244,12 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[7 1 4 4 0 4 4 5 2 1 5 7 4 4 5 0 0 6 6 0 6 4 5 6 3 6 7 1 6 4 4 1 5 0 4 4 7\n", - " 6 6 2 1 0 0 4 6 5 4 6 4 6 6 0 4 6 1 1 4 1 4 1 4 0 1 4 1 5 4 7 4 7 6 4 6 1\n", - " 6 4 6 7 4 6 6 1 6 1 4 7 6 4 6 0 4 6 4 6 6 4 0 3 4 6 4 1 0 0 4 4 6 1 0 7 4\n", - " 6 0 4 4 0 1 6 6 0 2 0 6 0 3 6 5 7 6 4 4 3 6 6 6 1 7 4 6 6 4 4 6 6 0 4 6 4\n", - " 5 0 4 4 6 4 6 1 5 6 6 0 6 0 3 4 4 6 1 5 3 7 6 6 6 7 4]\n" - ] - } - ], "source": [ "from aeon.classification.feature_based import TSFreshClassifier\n", "from aeon.clustering.feature_based import TSFreshClusterer\n", "\n", - "cls = TSFreshClassifier()\n", - "clst = TSFreshClusterer()\n", + "cls = TSFreshClassifier(relevant_feature_extractor=False)\n", + "clst = TSFreshClusterer(n_clusters=2)\n", "\n", "cls.fit(X_train, y_train)\n", "cls.score(X_test, y_test)\n", @@ -225,8 +258,24 @@ "print(clst.predict(X_test))" ], "metadata": { - "collapsed": false - } + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-11-25T14:08:02.405107Z", + "start_time": "2024-11-25T14:07:50.878523Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['0' '1' '0' '1' '1' '2' '0' '1' '1' '0' '1' '1' '0' '2' '0' '0' '0' '2'\n", + " '2' '1' '0' '0' '0' '0']\n", + "[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]\n" + ] + } + ], + "execution_count": 7 }, { "cell_type": "markdown", @@ -242,29 +291,33 @@ }, { "cell_type": "code", - "execution_count": null, + "source": [ + "from aeon.classification.sklearn import RotationForestClassifier\n", + "\n", + "cls = TSFreshClassifier(estimator=RotationForestClassifier(n_estimators=5))\n", + "cls.fit(X_train, y_train)\n", + "cls.score(X_test, y_test)" + ], + "metadata": { + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-11-25T14:08:13.304452Z", + "start_time": "2024-11-25T14:08:06.677532Z" + } + }, "outputs": [ { "data": { - "text/plain": "0.5771428571428572" + "text/plain": [ + "0.5833333333333334" + ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], - "source": [ - "from aeon.classification.sklearn import RotationForestClassifier\n", - "\n", - "cls = TSFreshClassifier(\n", - " relevant_feature_extractor=False, estimator=RotationForestClassifier(n_estimators=5)\n", - ") #\n", - "cls.fit(X_train, y_train)\n", - "cls.score(X_test, y_test)" - ], - "metadata": { - "collapsed": false - } + "execution_count": 8 }, { "cell_type": "markdown", @@ -279,20 +332,6 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[1 0 1 1 1 1 1 0 2 0 0 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 1 1 1 1\n", - " 1 1 2 1 1 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1\n", - " 1 1 1 1 1 1 1 1 1 0 1 2 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1\n", - " 1 1 1 1 1 0 1 1 1 2 1 1 1 0 1 0 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n", - " 0 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 0 1 1 1 1 1 1]\n" - ] - } - ], "source": [ "from sklearn.cluster import KMeans\n", "\n", @@ -301,8 +340,22 @@ "print(clst.predict(X_test))" ], "metadata": { - "collapsed": false - } + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-11-25T14:08:38.025066Z", + "start_time": "2024-11-25T14:08:33.300907Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 2 0 0 1]\n" + ] + } + ], + "execution_count": 9 }, { "cell_type": "markdown", @@ -316,39 +369,442 @@ }, { "cell_type": "code", - "execution_count": null, - "outputs": [ - { - "data": { - "text/plain": "TSFreshRegressor()", - "text/html": "
TSFreshRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "text/plain": "TSFreshRegressor()", - "text/html": "
TSFreshRegressor()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ "from aeon.regression.feature_based import TSFreshRegressor\n", "\n", - "reg = TSFreshRegressor()\n", + "reg = TSFreshRegressor(relevant_feature_extractor=False)\n", "from aeon.datasets import load_covid_3month\n", "\n", - "X, y = load_covid_3month()\n", + "X, y = load_covid_3month(split=\"train\")\n", "reg.fit(X, y)" ], "metadata": { - "collapsed": false - } + "collapsed": false, + "ExecuteTime": { + "end_time": "2024-11-25T14:09:11.745540Z", + "start_time": "2024-11-25T14:08:56.573376Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "TSFreshRegressor(relevant_feature_extractor=False)" + ], + "text/html": [ + "
TSFreshRegressor(relevant_feature_extractor=False)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 10 }, { "cell_type": "markdown", @@ -363,14 +819,13 @@ "source": [ "## TSFresh with multivariate time series data\n", "\n", - "All three estimators can be used with multivariate time series. The estimators\n", - "calculate the features on each channel independently then concatenate the results.\n", - "The full transform creates `777*n_channels` features." + "``TSFresh`` transformers and all three estimators can be used with multivariate time \n", + "series. The transform calculates the features on each channel independently then \n", + "concatenate the results. The full transform creates `777*n_channels` features." ] }, { "cell_type": "code", - "execution_count": null, "metadata": { "execution": { "iopub.execute_input": "2020-12-19T14:31:09.311742Z", @@ -378,8 +833,17 @@ "iopub.status.idle": "2020-12-19T14:31:09.380791Z", "shell.execute_reply": "2020-12-19T14:31:09.381304Z" }, - "scrolled": true + "scrolled": true, + "ExecuteTime": { + "end_time": "2024-11-25T14:11:57.583864Z", + "start_time": "2024-11-25T14:11:57.545946Z" + } }, + "source": [ + "X_train, y_train = load_basic_motions(split=\"train\")\n", + "X_test, y_test = load_basic_motions(split=\"test\")\n", + "print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)" + ], "outputs": [ { "name": "stdout", @@ -389,16 +853,10 @@ ] } ], - "source": [ - "X_train, y_train = load_basic_motions(split=\"train\")\n", - "X_test, y_test = load_basic_motions(split=\"test\")\n", - "print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)" - ] + "execution_count": 14 }, { "cell_type": "code", - "execution_count": null, - "outputs": [], "source": [ "tsfresh = TSFresh()\n", "X = tsfresh.fit_transform(X_train, y_train)\n", @@ -408,24 +866,32 @@ "collapsed": false, "pycharm": { "is_executing": true + }, + "ExecuteTime": { + "end_time": "2024-11-25T14:12:19.453228Z", + "start_time": "2024-11-25T14:11:58.795027Z" } - } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(40, 4662)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "execution_count": 15 }, { + "metadata": {}, "cell_type": "code", - "execution_count": null, "outputs": [], - "source": [ - "cls = TSFreshClassifier()\n", - "clst = TSFreshClusterer(estimator=KMeans(n_clusters=4))\n", - "cls.fit(X_train, y_train)\n", - "cls.score(X_test, y_test)\n", - "clst.fit(X_train)\n", - "print(cls.predict(X_test))" - ], - "metadata": { - "collapsed": false - } + "execution_count": null, + "source": "" } ], "metadata": {