14
14
from numba .typed import List
15
15
from sklearn .preprocessing import LabelEncoder
16
16
17
- from aeon .distances import manhattan_distance
17
+ from aeon .distances import get_distance_function
18
18
from aeon .transformations .collection import BaseCollectionTransformer
19
19
from aeon .utils .numba .general import (
20
20
AEON_NUMBA_STD_THRESHOLD ,
@@ -83,6 +83,9 @@ class RandomDilatedShapeletTransform(BaseCollectionTransformer):
83
83
If True, restrict the value of the shapelet dilation parameter to be prime
84
84
values. This can greatly speed up the algorithm for long time series and/or
85
85
short shapelet length, possibly at the cost of some accuracy.
86
+ distance: str="manhattan"
87
+ Name of the distance function to be used. By default this is the
88
+ manhattan distance. Other distances from the aeon distance modules can be used.
86
89
n_jobs : int, default=1
87
90
The number of threads used for both `fit` and `transform`.
88
91
random_state : int or None, default=None
@@ -153,6 +156,7 @@ def __init__(
153
156
alpha_similarity = 0.5 ,
154
157
use_prime_dilations = False ,
155
158
random_state = None ,
159
+ distance = "manhattan" ,
156
160
n_jobs = 1 ,
157
161
):
158
162
self .max_shapelets = max_shapelets
@@ -162,6 +166,7 @@ def __init__(
162
166
self .alpha_similarity = alpha_similarity
163
167
self .use_prime_dilations = use_prime_dilations
164
168
self .random_state = random_state
169
+ self .distance = distance
165
170
self .n_jobs = n_jobs
166
171
167
172
super ().__init__ ()
@@ -183,7 +188,8 @@ def _fit(self, X, y=None):
183
188
self : RandomDilatedShapeletTransform
184
189
This estimator.
185
190
"""
186
- # Numba does not yet support new random numpy API with generator
191
+ self .distance_func = get_distance_function (self .distance )
192
+
187
193
if isinstance (self .random_state , int ):
188
194
self ._random_state = np .int32 (self .random_state )
189
195
else :
@@ -218,6 +224,7 @@ def _fit(self, X, y=None):
218
224
self .alpha_similarity ,
219
225
self .use_prime_dilations ,
220
226
self ._random_state ,
227
+ self .distance_func ,
221
228
)
222
229
if len (self .shapelets_ [0 ]) == 0 :
223
230
raise RuntimeError (
@@ -259,7 +266,11 @@ def _transform(self, X, y=None):
259
266
"calling transform."
260
267
)
261
268
262
- X_new = dilated_shapelet_transform (X , self .shapelets_ )
269
+ X_new = dilated_shapelet_transform (
270
+ X ,
271
+ self .shapelets_ ,
272
+ self .distance_func ,
273
+ )
263
274
if np .isinf (X_new ).any () or np .isnan (X_new ).any ():
264
275
warnings .warn (
265
276
"Some invalid values (inf or nan) where converted from to 0 during the"
@@ -482,6 +493,7 @@ def random_dilated_shapelet_extraction(
482
493
alpha_similarity ,
483
494
use_prime_dilations ,
484
495
seed ,
496
+ distance ,
485
497
):
486
498
"""Randomly generate a set of shapelets given the input parameters.
487
499
@@ -518,6 +530,10 @@ def random_dilated_shapelet_extraction(
518
530
short shapelet length, possibly at the cost of some accuracy.
519
531
seed : int
520
532
Seed for random number generation.
533
+ distance: CPUDispatcher
534
+ A Numba function used to compute the distance between two multidimensional
535
+ time series of shape (n_channels, length). Used as distance function between
536
+ shapelets and candidate subsequences
521
537
522
538
Returns
523
539
-------
@@ -641,7 +657,7 @@ def random_dilated_shapelet_extraction(
641
657
X [id_test ], length , dilation
642
658
)
643
659
X_subs = normalize_subsequences (X_subs , X_means , X_stds )
644
- x_dist = compute_shapelet_dist_vector (X_subs , _val , length )
660
+ x_dist = compute_shapelet_dist_vector (X_subs , _val , length , distance )
645
661
646
662
lower_bound = np .percentile (x_dist , threshold_percentiles [0 ])
647
663
upper_bound = np .percentile (x_dist , threshold_percentiles [1 ])
@@ -669,7 +685,7 @@ def random_dilated_shapelet_extraction(
669
685
670
686
671
687
@njit (fastmath = True , cache = True , parallel = True )
672
- def dilated_shapelet_transform (X , shapelets ):
688
+ def dilated_shapelet_transform (X , shapelets , distance ):
673
689
"""Perform the shapelet transform with a set of shapelets and a set of time series.
674
690
675
691
Parameters
@@ -692,6 +708,10 @@ def dilated_shapelet_transform(X, shapelets):
692
708
Means of the shapelets
693
709
- stds : array, shape (n_shapelets, n_channels)
694
710
Standard deviation of the shapelets
711
+ distance: CPUDispatcher
712
+ A Numba function used to compute the distance between two multidimensional
713
+ time series of shape (n_channels, length).
714
+
695
715
696
716
Returns
697
717
-------
@@ -728,7 +748,7 @@ def dilated_shapelet_transform(X, shapelets):
728
748
for i_shp in idx_no_norm :
729
749
X_new [i_x , (n_ft * i_shp ) : (n_ft * i_shp + n_ft )] = (
730
750
compute_shapelet_features (
731
- X_subs , values [i_shp ], length , threshold [i_shp ]
751
+ X_subs , values [i_shp ], length , threshold [i_shp ], distance
732
752
)
733
753
)
734
754
@@ -739,7 +759,7 @@ def dilated_shapelet_transform(X, shapelets):
739
759
for i_shp in idx_norm :
740
760
X_new [i_x , (n_ft * i_shp ) : (n_ft * i_shp + n_ft )] = (
741
761
compute_shapelet_features (
742
- X_subs , values [i_shp ], length , threshold [i_shp ]
762
+ X_subs , values [i_shp ], length , threshold [i_shp ], distance
743
763
)
744
764
)
745
765
return X_new
@@ -808,7 +828,7 @@ def get_all_subsequences(X, length, dilation):
808
828
809
829
810
830
@njit (fastmath = True , cache = True )
811
- def compute_shapelet_features (X_subs , values , length , threshold ):
831
+ def compute_shapelet_features (X_subs , values , length , threshold , distance ):
812
832
"""Extract the features from a shapelet distance vector.
813
833
814
834
Given a shapelet and a time series, extract three features from the resulting
@@ -826,10 +846,11 @@ def compute_shapelet_features(X_subs, values, length, threshold):
826
846
The value array of the shapelet
827
847
length : int
828
848
Length of the shapelet
829
- values : array, shape (n_channels, length)
830
- The resulting subsequence
831
849
threshold : float
832
850
The threshold parameter of the shapelet
851
+ distance: CPUDispatcher
852
+ A Numba function used to compute the distance between two multidimensional
853
+ time series of shape (n_channels, length).
833
854
834
855
Returns
835
856
-------
@@ -843,7 +864,7 @@ def compute_shapelet_features(X_subs, values, length, threshold):
843
864
n_subsequences = X_subs .shape [0 ]
844
865
845
866
for i_sub in prange (n_subsequences ):
846
- _dist = manhattan_distance (X_subs [i_sub ], values [:, :length ])
867
+ _dist = distance (X_subs [i_sub ], values [:, :length ])
847
868
if _dist < _min :
848
869
_min = _dist
849
870
_argmin = i_sub
@@ -854,7 +875,7 @@ def compute_shapelet_features(X_subs, values, length, threshold):
854
875
855
876
856
877
@njit (fastmath = True , cache = True )
857
- def compute_shapelet_dist_vector (X_subs , values , length ):
878
+ def compute_shapelet_dist_vector (X_subs , values , length , distance ):
858
879
"""Extract the features from a shapelet distance vector.
859
880
860
881
Given a shapelet and a time series, extract three features from the resulting
@@ -872,20 +893,17 @@ def compute_shapelet_dist_vector(X_subs, values, length):
872
893
The value array of the shapelet
873
894
length : int
874
895
Length of the shapelet
875
- dilation : int
876
- Dilation of the shapelet
877
- values : array, shape (n_channels, length)
878
- The resulting subsequence
879
- threshold : float
880
- The threshold parameter of the shapelet
896
+ distance: CPUDispatcher
897
+ A Numba function used to compute the distance between two multidimensional
898
+ time series of shape (n_channels, length).
881
899
882
900
Returns
883
901
-------
884
- min, argmin, shapelet occurence
885
- The three computed features as float dtypes
902
+ dist_vector : array, shape = (n_timestamps-(length-1)*dilation)
903
+ The distance vector between the shapelets and candidate subsequences
886
904
"""
887
905
n_subsequences = X_subs .shape [0 ]
888
906
dist_vector = np .zeros (n_subsequences )
889
907
for i_sub in prange (n_subsequences ):
890
- dist_vector [i_sub ] = manhattan_distance (X_subs [i_sub ], values [:, :length ])
908
+ dist_vector [i_sub ] = distance (X_subs [i_sub ], values [:, :length ])
891
909
return dist_vector
0 commit comments