Merge remote-tracking branch 'origin/main' into mm/docs2

# Conflicts: # examples/pydata/Amsterdam-2023/Lets_do_the_time_warp_again.ipynb
aeon-toolkit · Nov 26, 2024 · f563a0a · f563a0a
2 parents 7c40473 + f4d2daa
commit f563a0a
Show file tree

Hide file tree

Showing 90 changed files with 3,097 additions and 839 deletions.
diff --git a/.github/utilities/run_examples.sh b/.github/utilities/run_examples.sh
@@ -9,6 +9,7 @@ excluded=()
 if [ "$1" = true ]; then
   excluded+=(
     "examples/datasets/load_data_from_web.ipynb"
+    "examples/benchmarking/published_results.ipynb"
     "examples/benchmarking/reference_results.ipynb"
     "examples/benchmarking/bakeoff_results.ipynb"
     "examples/benchmarking/regression.ipynb"
@@ -21,6 +22,7 @@ if [ "$1" = true ]; then
     "examples/classification/interval_based.ipynb"
     "examples/classification/shapelet_based.ipynb"
     "examples/classification/convolution_based.ipynb"
+    "examples/similarity_search/code_speed.ipynb"
 
   )
 fi

diff --git a/.github/workflows/periodic_tests.yml b/.github/workflows/periodic_tests.yml
@@ -83,6 +83,31 @@ jobs:
           # Save cache with the current date (ENV set in numba_cache action)
           key: numba-run-notebook-examples-${{ runner.os }}-3.10-${{ env.CURRENT_DATE }}
 
+  test-core-imports:
+    runs-on: ubuntu-22.04
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python 3.10
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install aeon and dependencies
+        uses: nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          command: python -m pip install .
+
+      - name: Show dependencies
+        run: python -m pip list
+
+      - name: Run import test
+        run: python aeon/testing/tests/test_core_imports.py
+
   test-no-soft-deps:
     runs-on: ubuntu-22.04
 

diff --git a/.github/workflows/pr_core_dep_import.yml b/.github/workflows/pr_core_dep_import.yml
@@ -0,0 +1,43 @@
+name: PR module imports
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+    branches:
+      - main
+    paths:
+      - "aeon/**"
+      - ".github/workflows/**"
+      - "pyproject.toml"
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  test-core-imports:
+    runs-on: ubuntu-22.04
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Setup Python 3.10
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: Install aeon and dependencies
+        uses: nick-fields/retry@v3
+        with:
+          timeout_minutes: 30
+          max_attempts: 3
+          command: python -m pip install .
+
+      - name: Show dependencies
+        run: python -m pip list
+
+      - name: Run import test
+        run: python aeon/testing/tests/test_core_imports.py
diff --git a/.github/workflows/pr_pytest.yml b/.github/workflows/pr_pytest.yml
@@ -48,7 +48,7 @@ jobs:
         run: python -m pip list
 
       - name: Run tests
-        run: python -m pytest -n logical -k 'not TestAll'
+        run: python -m pytest -n logical
 
   pytest:
     runs-on: ${{ matrix.os }}

diff --git a/.readthedocs.yml b/.readthedocs.yml
@@ -11,7 +11,7 @@ python:
         - docs
 
 build:
-  os: ubuntu-20.04
+  os: ubuntu-22.04
   tools:
     python: "3.10"
 

diff --git a/README.md b/README.md
@@ -22,6 +22,7 @@ The following modules are still considered experimental, and the [deprecation po
 does not apply:
 
 - `anomaly_detection`
+- `forecasting`
 - `segmentation`
 - `similarity_search`
 - `visualisation`

diff --git a/aeon/base/tests/test_base_collection.py b/aeon/base/tests/test_base_collection.py
@@ -14,7 +14,7 @@
     UNEQUAL_LENGTH_MULTIVARIATE_CLASSIFICATION,
     UNEQUAL_LENGTH_UNIVARIATE_CLASSIFICATION,
 )
-from aeon.utils import COLLECTIONS_DATA_TYPES
+from aeon.utils.data_types import COLLECTIONS_DATA_TYPES
 from aeon.utils.validation import get_type
 
 

diff --git a/aeon/classification/distance_based/_proximity_tree.py b/aeon/classification/distance_based/_proximity_tree.py
@@ -234,7 +234,7 @@ def _get_best_splitter(self, X, y):
                     dist = distance(
                         X[j],
                         splitter[0][labels[k]],
-                        metric=measure,
+                        measure=measure,
                         **splitter[1][measure],
                     )
                     if dist < min_dist:
@@ -320,7 +320,7 @@ def _build_tree(self, X, y, depth, node_id, parent_target_value=None):
                 dist = distance(
                     X[i],
                     splitter[0][labels[j]],
-                    metric=measure,
+                    measure=measure,
                     **splitter[1][measure],
                 )
                 if dist < min_dist:
@@ -404,7 +404,7 @@ def _classify(self, treenode, x):
                 dist = distance(
                     x,
                     treenode.splitter[0][branches[i]],
-                    metric=measure,
+                    measure=measure,
                     **treenode.splitter[1][measure],
                 )
                 if dist < min_dist:

diff --git a/aeon/classification/distance_based/_time_series_neighbors.py b/aeon/classification/distance_based/_time_series_neighbors.py
@@ -111,7 +111,7 @@ def _fit(self, X, y):
         y : array-like, shape = (n_cases)
             The class labels.
         """
-        self.metric_ = get_distance_function(metric=self.distance)
+        self.metric_ = get_distance_function(measure=self.distance)
         self.X_ = X
         self.classes_, self.y_ = np.unique(y, return_inverse=True)
         return self

diff --git a/aeon/classification/tests/test_base.py b/aeon/classification/tests/test_base.py
@@ -15,7 +15,7 @@
     EQUAL_LENGTH_UNIVARIATE_CLASSIFICATION,
     UNEQUAL_LENGTH_UNIVARIATE_CLASSIFICATION,
 )
-from aeon.utils import COLLECTIONS_DATA_TYPES
+from aeon.utils.data_types import COLLECTIONS_DATA_TYPES
 
 __maintainer__ = []
 

diff --git a/aeon/clustering/_clara.py b/aeon/clustering/_clara.py
@@ -42,7 +42,7 @@ class TimeSeriesCLARA(BaseClusterer):
         If a np.ndarray provided it must be of shape (n_clusters,) and contain
         the indexes of the time series to use as centroids.
     distance : str or Callable, default='msm'
-        Distance metric to compute similarity between time series. A list of valid
+        Distance measure to compute similarity between time series. A list of valid
         strings for metrics can be found in the documentation for
         :func:`aeon.distances.get_distance_function`. If a callable is passed it must be
         a function that takes two 2d numpy arrays as input and returns a float.
@@ -73,7 +73,7 @@ class TimeSeriesCLARA(BaseClusterer):
         If `None`, the random number generator is the `RandomState` instance used
         by `np.random`.
     distance_params : dict, default=None
-        Dictionary containing kwargs for the distance metric being used.
+        Dictionary containing kwargs for the distance measure being used.
 
     Attributes
     ----------
@@ -189,7 +189,7 @@ def _fit(self, X: np.ndarray, y=None):
             curr_centers = pam.cluster_centers_
             if isinstance(pam.distance, str):
                 pairwise_matrix = pairwise_distance(
-                    X, curr_centers, metric=self.distance, **pam._distance_params
+                    X, curr_centers, measure=self.distance, **pam._distance_params
                 )
             else:
                 pairwise_matrix = pairwise_distance(

diff --git a/aeon/clustering/_clarans.py b/aeon/clustering/_clarans.py
@@ -43,8 +43,8 @@ class TimeSeriesCLARANS(TimeSeriesKMedoids):
         If a np.ndarray provided it must be of shape (n_clusters,) and contain
         the indexes of the time series to use as centroids.
     distance : str or Callable, default='msm'
-        Distance metric to compute similarity between time series. A list of valid
-        strings for metrics can be found in the documentation for
+        Distance measure to compute similarity between time series. A list of valid
+        strings for measures can be found in the documentation for
         :func:`aeon.distances.get_distance_function`. If a callable is passed it must be
         a function that takes two 2d numpy arrays as input and returns a float.
     max_neighbours : int, default=None,
@@ -62,7 +62,7 @@ class TimeSeriesCLARANS(TimeSeriesKMedoids):
     random_state : int or np.random.RandomState instance or None, default=None
         Determines random number generation for centroid initialization.
     distance_params : dict, default=None
-        Dictionary containing kwargs for the distance metric being used.
+        Dictionary containing kwargs for the distance measure being used.
 
     Attributes
     ----------

diff --git a/aeon/clustering/_elastic_som.py b/aeon/clustering/_elastic_som.py
@@ -44,8 +44,8 @@ class ElasticSOM(BaseClusterer):
     n_clusters : int, default=8
         The number of clusters to form as well as the number of centroids to generate.
     distance : str or Callable, default='dtw'
-        Distance metric to compute similarity between time series. A list of valid
-        strings for metrics can be found in the documentation for
+        Distance measure to compute similarity between time series. A list of valid
+        strings for measures can be found in the documentation for
         :func:`aeon.distances.get_distance_function`. If a callable is passed it must be
         a function that takes two 2d numpy arrays as input and returns a float.
     init : str or np.ndarray, default='random'
@@ -224,7 +224,7 @@ def _find_bmu(self, x, weights):
         pairwise_matrix = pairwise_distance(
             x,
             weights,
-            metric=self.distance,
+            measure=self.distance,
             **self._distance_params,
         )
         return pairwise_matrix.argmin(axis=1)
@@ -366,7 +366,7 @@ def _kmeans_plus_plus_center_initializer(self, X: np.ndarray):
 
         for _ in range(1, self.n_clusters):
             pw_dist = pairwise_distance(
-                X, X[indexes], metric=self.distance, **self._distance_params
+                X, X[indexes], measure=self.distance, **self._distance_params
             )
             min_distances = pw_dist.min(axis=1)
             probabilities = min_distances / min_distances.sum()

diff --git a/aeon/clustering/_k_means.py b/aeon/clustering/_k_means.py
@@ -54,8 +54,8 @@ class TimeSeriesKMeans(BaseClusterer):
         n_timepoints)
         and contains the time series to use as centroids.
     distance : str or Callable, default='msm'
-        Distance metric to compute similarity between time series. A list of valid
-        strings for metrics can be found in the documentation for
+        Distance measure to compute similarity between time series. A list of valid
+        strings for measures can be found in the documentation for
         :func:`aeon.distances.get_distance_function`. If a callable is passed it must be
         a function that takes two 2d numpy arrays as input and returns a float.
     n_init : int, default=10
@@ -236,7 +236,7 @@ def _fit_one_init(self, X: np.ndarray) -> tuple:
         prev_labels = None
         for i in range(self.max_iter):
             curr_pw = pairwise_distance(
-                X, cluster_centres, metric=self.distance, **self._distance_params
+                X, cluster_centres, measure=self.distance, **self._distance_params
             )
             curr_labels = curr_pw.argmin(axis=1)
             curr_inertia = curr_pw.min(axis=1).sum()
@@ -273,13 +273,13 @@ def _fit_one_init(self, X: np.ndarray) -> tuple:
     def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
         if isinstance(self.distance, str):
             pairwise_matrix = pairwise_distance(
-                X, self.cluster_centers_, metric=self.distance, **self._distance_params
+                X, self.cluster_centers_, measure=self.distance, **self._distance_params
             )
         else:
             pairwise_matrix = pairwise_distance(
                 X,
                 self.cluster_centers_,
-                metric=self.distance,
+                measure=self.distance,
                 **self._distance_params,
             )
         return pairwise_matrix.argmin(axis=1)
@@ -346,7 +346,7 @@ def _kmeans_plus_plus_center_initializer(self, X: np.ndarray):
 
         for _ in range(1, self.n_clusters):
             pw_dist = pairwise_distance(
-                X, X[indexes], metric=self.distance, **self._distance_params
+                X, X[indexes], measure=self.distance, **self._distance_params
             )
             min_distances = pw_dist.min(axis=1)
             probabilities = min_distances / min_distances.sum()
@@ -381,7 +381,7 @@ def _handle_empty_cluster(
             index_furthest_from_centre = curr_pw.min(axis=1).argmax()
             cluster_centres[current_empty_cluster_index] = X[index_furthest_from_centre]
             curr_pw = pairwise_distance(
-                X, cluster_centres, metric=self.distance, **self._distance_params
+                X, cluster_centres, measure=self.distance, **self._distance_params
             )
             curr_labels = curr_pw.argmin(axis=1)
             curr_inertia = curr_pw.min(axis=1).sum()

diff --git a/aeon/clustering/_k_medoids.py b/aeon/clustering/_k_medoids.py
@@ -56,8 +56,8 @@ class TimeSeriesKMedoids(BaseClusterer):
         If a np.ndarray provided it must be of shape (n_clusters,) and contain
         the indexes of the time series to use as centroids.
     distance : str or Callable, default='msm'
-        Distance metric to compute similarity between time series. A list of valid
-        strings for metrics can be found in the documentation for
+        Distance measure to compute similarity between time series. A list of valid
+        strings for measures can be found in the documentation for
         :func:`aeon.distances.get_distance_function`. If a callable is passed it must be
         a function that takes two 2d numpy arrays as input and returns a float.
     method : str, default='pam'
@@ -88,7 +88,7 @@ class TimeSeriesKMedoids(BaseClusterer):
         If `None`, the random number generator is the `RandomState` instance used
         by `np.random`.
     distance_params: dict, default=None
-        Dictionary containing kwargs for the distance metric being used.
+        Dictionary containing kwargs for the distance measure being used.
 
     Attributes
     ----------
@@ -211,7 +211,7 @@ def _fit(self, X: np.ndarray, y=None):
     def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
         if isinstance(self.distance, str):
             pairwise_matrix = pairwise_distance(
-                X, self.cluster_centers_, metric=self.distance, **self._distance_params
+                X, self.cluster_centers_, measure=self.distance, **self._distance_params
             )
         else:
             pairwise_matrix = pairwise_distance(
@@ -456,7 +456,7 @@ def _check_params(self, X: np.ndarray) -> None:
                 f"n_clusters ({self.n_clusters}) cannot be larger than "
                 f"n_cases ({X.shape[0]})"
             )
-        self._distance_callable = get_distance_function(metric=self.distance)
+        self._distance_callable = get_distance_function(measure=self.distance)
         self._distance_cache = np.full((X.shape[0], X.shape[0]), np.inf)
 
         if self.method == "alternate":
@@ -486,7 +486,7 @@ def _kmedoids_plus_plus_center_initializer(self, X: np.ndarray):
 
         for _ in range(1, self.n_clusters):
             pw_dist = pairwise_distance(
-                X, X[indexes], metric=self.distance, **self._distance_params
+                X, X[indexes], measure=self.distance, **self._distance_params
             )
             min_distances = pw_dist.min(axis=1)
             probabilities = min_distances / min_distances.sum()

diff --git a/aeon/clustering/averaging/_ba_petitjean.py b/aeon/clustering/averaging/_ba_petitjean.py
@@ -55,7 +55,7 @@ def petitjean_barycenter_average(
     random_state: int or None, default=None
         Random state to use for the barycenter averaging.
     **kwargs
-        Keyword arguments to pass to the distance metric.
+        Keyword arguments to pass to the distance measure.
 
     Returns
     -------

diff --git a/aeon/clustering/averaging/_ba_subgradient.py b/aeon/clustering/averaging/_ba_subgradient.py
@@ -70,7 +70,7 @@ def subgradient_barycenter_average(
     random_state: int or None, default=None
         Random state to use for the barycenter averaging.
     **kwargs
-        Keyword arguments to pass to the distance metric.
+        Keyword arguments to pass to the distance measure.
 
     Returns
     -------

diff --git a/aeon/clustering/averaging/_ba_utils.py b/aeon/clustering/averaging/_ba_utils.py
@@ -31,7 +31,7 @@ def _medoids(
         return X
 
     if precomputed_pairwise_distance is None:
-        precomputed_pairwise_distance = pairwise_distance(X, metric=distance, **kwargs)
+        precomputed_pairwise_distance = pairwise_distance(X, measure=distance, **kwargs)
 
     x_size = X.shape[0]
     distance_matrix = np.zeros((x_size, x_size))
@@ -155,6 +155,6 @@ def _get_alignment_path(
     elif distance == "adtw":
         return adtw_alignment_path(ts, center, window=window, warp_penalty=warp_penalty)
     else:
-        # When numba version > 0.57 add more informative error with what metric
+        # When numba version > 0.57 add more informative error with what measure
         # was passed.
         raise ValueError("Distance parameter invalid")
diff --git a/aeon/clustering/averaging/_barycenter_averaging.py b/aeon/clustering/averaging/_barycenter_averaging.py
@@ -84,7 +84,7 @@ def elastic_barycenter_average(
     random_state: int or None, default=None
         Random state to use for the barycenter averaging.
     **kwargs
-        Keyword arguments to pass to the distance metric.
+        Keyword arguments to pass to the distance measure.
 
     Returns
     -------