aeon-toolkit · baraline · Jul 9, 2024 · Jun 22, 2024 · Jun 22, 2024 · Jun 25, 2024
diff --git a/CODEOWNERS b/CODEOWNERS
@@ -44,6 +44,8 @@ aeon/transformations/theta.py @GuzalBulatova
 
 aeon/utils/numba/ @baraline @MatthewMiddlehurst
 
+aeon/visualisation/ @baraline
+
 .github/ @aeon-toolkit/aeon-infrastructure-workgroup
 build_tools/ @aeon-toolkit/aeon-infrastructure-workgroup
 

@@ -60,6 +60,9 @@ class RDSTClassifier(BaseClassifier):
         If True, restrict the value of the shapelet dilation parameter to be prime
         values. This can greatly speed-up the algorithm for long time series and/or
         short shapelet length, possibly at the cost of some accuracy.
+    distance: str="manhattan"
+        Name of the distance function to be used. By default this is the
+        manhattan distance. Other distances from the aeon distance modules can be used.
     estimator : BaseEstimator or None, default=None
         Base estimator for the ensemble, can be supplied a sklearn `BaseEstimator`. If
         `None` a default `RidgeClassifierCV` classifier is used with standard scalling.
@@ -134,6 +137,7 @@ def __init__(
         use_prime_dilations: bool = False,
         estimator=None,
         save_transformed_data: bool = False,
+        distance: str = "manhattan",
         n_jobs: int = 1,
         random_state: Union[int, Type[np.random.RandomState], None] = None,
     ) -> None:
@@ -143,7 +147,7 @@ def __init__(
         self.threshold_percentiles = threshold_percentiles
         self.alpha_similarity = alpha_similarity
         self.use_prime_dilations = use_prime_dilations
-
+        self.distance = distance
         self.estimator = estimator
         self.save_transformed_data = save_transformed_data
         self.random_state = random_state
@@ -184,6 +188,7 @@ def _fit(self, X, y):
             use_prime_dilations=self.use_prime_dilations,
             n_jobs=self.n_jobs,
             random_state=self.random_state,
+            distance=self.distance,
         )
         if self.estimator is None:
             self._estimator = make_pipeline(

@@ -14,7 +14,7 @@
 from numba.typed import List
 from sklearn.preprocessing import LabelEncoder
 
-from aeon.distances import manhattan_distance
+from aeon.distances import get_distance_function
 from aeon.transformations.collection import BaseCollectionTransformer
 from aeon.utils.numba.general import (
     AEON_NUMBA_STD_THRESHOLD,
@@ -83,6 +83,9 @@ class RandomDilatedShapeletTransform(BaseCollectionTransformer):
         If True, restrict the value of the shapelet dilation parameter to be prime
         values. This can greatly speed up the algorithm for long time series and/or
         short shapelet length, possibly at the cost of some accuracy.
+    distance: str="manhattan"
+        Name of the distance function to be used. By default this is the
+        manhattan distance. Other distances from the aeon distance modules can be used.
     n_jobs : int, default=1
         The number of threads used for both `fit` and `transform`.
     random_state : int or None, default=None
@@ -153,6 +156,7 @@ def __init__(
         alpha_similarity=0.5,
         use_prime_dilations=False,
         random_state=None,
+        distance="manhattan",
         n_jobs=1,
     ):
         self.max_shapelets = max_shapelets
@@ -162,6 +166,7 @@ def __init__(
         self.alpha_similarity = alpha_similarity
         self.use_prime_dilations = use_prime_dilations
         self.random_state = random_state
+        self.distance = distance
         self.n_jobs = n_jobs
 
         super().__init__()
@@ -183,7 +188,8 @@ def _fit(self, X, y=None):
         self : RandomDilatedShapeletTransform
             This estimator.
         """
-        # Numba does not yet support new random numpy API with generator
+        self.distance_func = get_distance_function(self.distance)
+
         if isinstance(self.random_state, int):
             self._random_state = np.int32(self.random_state)
         else:
@@ -218,6 +224,7 @@ def _fit(self, X, y=None):
             self.alpha_similarity,
             self.use_prime_dilations,
             self._random_state,
+            self.distance_func,
         )
         if len(self.shapelets_[0]) == 0:
             raise RuntimeError(
@@ -259,7 +266,11 @@ def _transform(self, X, y=None):
                     "calling transform."
                 )
 
-        X_new = dilated_shapelet_transform(X, self.shapelets_)
+        X_new = dilated_shapelet_transform(
+            X,
+            self.shapelets_,
+            self.distance_func,
+        )
         if np.isinf(X_new).any() or np.isnan(X_new).any():
             warnings.warn(
                 "Some invalid values (inf or nan) where converted from to 0 during the"
@@ -482,6 +493,7 @@ def random_dilated_shapelet_extraction(
     alpha_similarity,
     use_prime_dilations,
     seed,
+    distance,
 ):
     """Randomly generate a set of shapelets given the input parameters.
 
@@ -518,6 +530,10 @@ def random_dilated_shapelet_extraction(
         short shapelet length, possibly at the cost of some accuracy.
     seed : int
         Seed for random number generation.
+    distance: CPUDispatcher
+        A Numba function used to compute the distance between two multidimensional
+        time series of shape (n_channels, length). Used as distance function between
+        shapelets and candidate subsequences
 
     Returns
     -------
@@ -641,7 +657,7 @@ def random_dilated_shapelet_extraction(
                         X[id_test], length, dilation
                     )
                     X_subs = normalize_subsequences(X_subs, X_means, X_stds)
-                x_dist = compute_shapelet_dist_vector(X_subs, _val, length)
+                x_dist = compute_shapelet_dist_vector(X_subs, _val, length, distance)
 
                 lower_bound = np.percentile(x_dist, threshold_percentiles[0])
                 upper_bound = np.percentile(x_dist, threshold_percentiles[1])
@@ -669,7 +685,7 @@ def random_dilated_shapelet_extraction(
 
 
 @njit(fastmath=True, cache=True, parallel=True)
-def dilated_shapelet_transform(X, shapelets):
+def dilated_shapelet_transform(X, shapelets, distance):
     """Perform the shapelet transform with a set of shapelets and a set of time series.
 
     Parameters
@@ -692,6 +708,10 @@ def dilated_shapelet_transform(X, shapelets):
             Means of the shapelets
         - stds : array, shape (n_shapelets, n_channels)
             Standard deviation of the shapelets
+    distance: CPUDispatcher
+        A Numba function used to compute the distance between two multidimensional
+        time series of shape (n_channels, length).
+
 
     Returns
     -------
@@ -728,7 +748,7 @@ def dilated_shapelet_transform(X, shapelets):
             for i_shp in idx_no_norm:
                 X_new[i_x, (n_ft * i_shp) : (n_ft * i_shp + n_ft)] = (
                     compute_shapelet_features(
-                        X_subs, values[i_shp], length, threshold[i_shp]
+                        X_subs, values[i_shp], length, threshold[i_shp], distance
                     )
                 )
 
@@ -739,7 +759,7 @@ def dilated_shapelet_transform(X, shapelets):
                 for i_shp in idx_norm:
                     X_new[i_x, (n_ft * i_shp) : (n_ft * i_shp + n_ft)] = (
                         compute_shapelet_features(
-                            X_subs, values[i_shp], length, threshold[i_shp]
+                            X_subs, values[i_shp], length, threshold[i_shp], distance
                         )
                     )
     return X_new
@@ -808,7 +828,7 @@ def get_all_subsequences(X, length, dilation):
 
 
 @njit(fastmath=True, cache=True)
-def compute_shapelet_features(X_subs, values, length, threshold):
+def compute_shapelet_features(X_subs, values, length, threshold, distance):
     """Extract the features from a shapelet distance vector.
 
     Given a shapelet and a time series, extract three features from the resulting
@@ -826,10 +846,11 @@ def compute_shapelet_features(X_subs, values, length, threshold):
         The value array of the shapelet
     length : int
         Length of the shapelet
-    values : array, shape (n_channels, length)
-        The resulting subsequence
     threshold : float
         The threshold parameter of the shapelet
+    distance: CPUDispatcher
+        A Numba function used to compute the distance between two multidimensional
+        time series of shape (n_channels, length).
 
     Returns
     -------
@@ -843,7 +864,7 @@ def compute_shapelet_features(X_subs, values, length, threshold):
     n_subsequences = X_subs.shape[0]
 
     for i_sub in prange(n_subsequences):
-        _dist = manhattan_distance(X_subs[i_sub], values[:, :length])
+        _dist = distance(X_subs[i_sub], values[:, :length])
         if _dist < _min:
             _min = _dist
             _argmin = i_sub
@@ -854,7 +875,7 @@ def compute_shapelet_features(X_subs, values, length, threshold):
 
 
 @njit(fastmath=True, cache=True)
-def compute_shapelet_dist_vector(X_subs, values, length):
+def compute_shapelet_dist_vector(X_subs, values, length, distance):
     """Extract the features from a shapelet distance vector.
 
     Given a shapelet and a time series, extract three features from the resulting
@@ -872,20 +893,17 @@ def compute_shapelet_dist_vector(X_subs, values, length):
         The value array of the shapelet
     length : int
         Length of the shapelet
-    dilation : int
-        Dilation of the shapelet
-    values : array, shape (n_channels, length)
-        The resulting subsequence
-    threshold : float
-        The threshold parameter of the shapelet
+    distance: CPUDispatcher
+        A Numba function used to compute the distance between two multidimensional
+        time series of shape (n_channels, length).
 
     Returns
     -------
-    min, argmin, shapelet occurence
-        The three computed features as float dtypes
+    dist_vector : array, shape = (n_timestamps-(length-1)*dilation)
+        The distance vector between the shapelets and candidate subsequences
     """
     n_subsequences = X_subs.shape[0]
     dist_vector = np.zeros(n_subsequences)
     for i_sub in prange(n_subsequences):
-        dist_vector[i_sub] = manhattan_distance(X_subs[i_sub], values[:, :length])
+        dist_vector[i_sub] = distance(X_subs[i_sub], values[:, :length])
     return dist_vector
@@ -179,7 +179,6 @@ def _fit(self, X, y):
         # 2--calculate PACF and ACF for each TS chosen in each class
 
         for i, c in enumerate(classes):
-
             X_c = X_[y == c]
 
             cnt = np.min([self.nb_inst_per_class, X_c.shape[0]]).astype(int)
@@ -313,7 +312,7 @@ def _transform(self, X, y=None):
 
         Returns
         -------
-        X_transformed: np.ndarray shape (n_cases, n_timepoints),
+        X_transformed: np.ndarray shape (n_cases, n_kernels),
             The transformed data
         """
         X_ = np.reshape(X, (X.shape[0], X.shape[-1]))

@@ -144,7 +144,9 @@ def test_compute_shapelet_features(dtype):
     dilation = 1
     threshold = 0.01
     X_subs = get_all_subsequences(X, length, dilation)
-    _min, _argmin, SO = compute_shapelet_features(X_subs, values, length, threshold)
+    _min, _argmin, SO = compute_shapelet_features(
+        X_subs, values, length, threshold, manhattan_distance
+    )
 
     # On some occasion, float32 precision with fasmath retruns things like
     # 2.1835059227370834e-07 instead of 0
@@ -155,7 +157,9 @@ def test_compute_shapelet_features(dtype):
     dilation = 2
     threshold = 0.1
     X_subs = get_all_subsequences(X, length, dilation)
-    _min, _argmin, SO = compute_shapelet_features(X_subs, values, length, threshold)
+    _min, _argmin, SO = compute_shapelet_features(
+        X_subs, values, length, threshold, manhattan_distance
+    )
 
     assert_almost_equal(_min, 0.0, decimal=4)
     assert _argmin == 7.0
@@ -164,7 +168,9 @@ def test_compute_shapelet_features(dtype):
     dilation = 4
     threshold = 2
     X_subs = get_all_subsequences(X, length, dilation)
-    _min, _argmin, SO = compute_shapelet_features(X_subs, values, length, threshold)
+    _min, _argmin, SO = compute_shapelet_features(
+        X_subs, values, length, threshold, manhattan_distance
+    )
 
     assert_almost_equal(_min, 0.0, decimal=4)
     assert _argmin == 3.0
@@ -179,7 +185,9 @@ def test_compute_shapelet_dist_vector(dtype):
         for dilation in [1, 3, 5]:
             values = np.random.rand(3, length).astype(dtype)
             X_subs = get_all_subsequences(X, length, dilation)
-            d_vect = compute_shapelet_dist_vector(X_subs, values, length)
+            d_vect = compute_shapelet_dist_vector(
+                X_subs, values, length, manhattan_distance
+            )
             true_vect = np.zeros(X.shape[1] - (length - 1) * dilation)
             for i_sub in range(true_vect.shape[0]):
                 _idx = [i_sub + j * dilation for j in range(length)]

@@ -22,10 +22,18 @@
     "plot_series_with_profiles",
     "plot_cluster_algorithm",
     "plot_temporal_importance_curves",
+    "ShapeletVisualizer",
+    "ShapeletTransformerVisualizer",
+    "ShapeletClassifierVisualizer",
 ]
 
 from aeon.visualisation.estimator._clasp import plot_series_with_profiles
 from aeon.visualisation.estimator._clustering import plot_cluster_algorithm
+from aeon.visualisation.estimator._shapelets import (
+    ShapeletClassifierVisualizer,
+    ShapeletTransformerVisualizer,
+    ShapeletVisualizer,
+)
 from aeon.visualisation.estimator._temporal_importance_curves import (
     plot_temporal_importance_curves,
 )