Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MNT] Distance module changed to method rather than metric #2407

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions aeon/classification/distance_based/_proximity_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,7 @@ def _get_best_splitter(self, X, y):
dist = distance(
X[j],
splitter[0][labels[k]],
measure=measure,
method=measure,
**splitter[1][measure],
)
if dist < min_dist:
Expand Down Expand Up @@ -320,7 +320,7 @@ def _build_tree(self, X, y, depth, node_id, parent_target_value=None):
dist = distance(
X[i],
splitter[0][labels[j]],
measure=measure,
method=measure,
**splitter[1][measure],
)
if dist < min_dist:
Expand Down Expand Up @@ -404,7 +404,7 @@ def _classify(self, treenode, x):
dist = distance(
x,
treenode.splitter[0][branches[i]],
measure=measure,
method=measure,
**treenode.splitter[1][measure],
)
if dist < min_dist:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def _fit(self, X, y):
y : array-like, shape = (n_cases)
The class labels.
"""
self.metric_ = get_distance_function(measure=self.distance)
self.metric_ = get_distance_function(method=self.distance)
self.X_ = X
self.classes_, self.y_ = np.unique(y, return_inverse=True)
return self
Expand Down
6 changes: 3 additions & 3 deletions aeon/clustering/_clara.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class TimeSeriesCLARA(BaseClusterer):
If a np.ndarray provided it must be of shape (n_clusters,) and contain
the indexes of the time series to use as centroids.
distance : str or Callable, default='msm'
Distance measure to compute similarity between time series. A list of valid
Distance method to compute similarity between time series. A list of valid
strings for metrics can be found in the documentation for
:func:`aeon.distances.get_distance_function`. If a callable is passed it must be
a function that takes two 2d numpy arrays as input and returns a float.
Expand Down Expand Up @@ -73,7 +73,7 @@ class TimeSeriesCLARA(BaseClusterer):
If `None`, the random number generator is the `RandomState` instance used
by `np.random`.
distance_params : dict, default=None
Dictionary containing kwargs for the distance measure being used.
Dictionary containing kwargs for the distance method being used.
Attributes
----------
Expand Down Expand Up @@ -189,7 +189,7 @@ def _fit(self, X: np.ndarray, y=None):
curr_centers = pam.cluster_centers_
if isinstance(pam.distance, str):
pairwise_matrix = pairwise_distance(
X, curr_centers, measure=self.distance, **pam._distance_params
X, curr_centers, method=self.distance, **pam._distance_params
)
else:
pairwise_matrix = pairwise_distance(
Expand Down
4 changes: 2 additions & 2 deletions aeon/clustering/_clarans.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ class TimeSeriesCLARANS(TimeSeriesKMedoids):
If a np.ndarray provided it must be of shape (n_clusters,) and contain
the indexes of the time series to use as centroids.
distance : str or Callable, default='msm'
Distance measure to compute similarity between time series. A list of valid
Distance method to compute similarity between time series. A list of valid
strings for measures can be found in the documentation for
:func:`aeon.distances.get_distance_function`. If a callable is passed it must be
a function that takes two 2d numpy arrays as input and returns a float.
Expand All @@ -62,7 +62,7 @@ class TimeSeriesCLARANS(TimeSeriesKMedoids):
random_state : int or np.random.RandomState instance or None, default=None
Determines random number generation for centroid initialization.
distance_params : dict, default=None
Dictionary containing kwargs for the distance measure being used.
Dictionary containing kwargs for the distance method being used.
Attributes
----------
Expand Down
8 changes: 4 additions & 4 deletions aeon/clustering/_elastic_som.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ class ElasticSOM(BaseClusterer):
n_clusters : int, default=8
The number of clusters to form as well as the number of centroids to generate.
distance : str or Callable, default='dtw'
Distance measure to compute similarity between time series. A list of valid
Distance method to compute similarity between time series. A list of valid
strings for measures can be found in the documentation for
:func:`aeon.distances.get_distance_function`. If a callable is passed it must be
a function that takes two 2d numpy arrays as input and returns a float.
Expand Down Expand Up @@ -105,7 +105,7 @@ class ElasticSOM(BaseClusterer):
by `np.random`.
custom_alignment_path : Callable, default=None
Custom alignment path function to use for the distance. If None, the default
alignment path function for the distance will be used. If the distance measure
alignment path function for the distance will be used. If the distance method
does not have an elastic alignment path then the default SOM einsum update will
be used. See aeon.clustering.elastic_som.VALID_ELASTIC_SOM_METRICS for a list of
distances that have an elastic alignment path.
Expand Down Expand Up @@ -224,7 +224,7 @@ def _find_bmu(self, x, weights):
pairwise_matrix = pairwise_distance(
x,
weights,
measure=self.distance,
method=self.distance,
**self._distance_params,
)
return pairwise_matrix.argmin(axis=1)
Expand Down Expand Up @@ -366,7 +366,7 @@ def _kmeans_plus_plus_center_initializer(self, X: np.ndarray):

for _ in range(1, self.n_clusters):
pw_dist = pairwise_distance(
X, X[indexes], measure=self.distance, **self._distance_params
X, X[indexes], method=self.distance, **self._distance_params
)
min_distances = pw_dist.min(axis=1)
probabilities = min_distances / min_distances.sum()
Expand Down
18 changes: 9 additions & 9 deletions aeon/clustering/_k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ class TimeSeriesKMeans(BaseClusterer):
n_timepoints)
and contains the time series to use as centroids.
distance : str or Callable, default='msm'
Distance measure to compute similarity between time series. A list of valid
Distance method to compute similarity between time series. A list of valid
strings for measures can be found in the documentation for
:func:`aeon.distances.get_distance_function`. If a callable is passed it must be
a function that takes two 2d numpy arrays as input and returns a float.
Expand Down Expand Up @@ -82,9 +82,9 @@ class TimeSeriesKMeans(BaseClusterer):
Averaging method to compute the average of a cluster. Any of the following
strings are valid: ['mean', 'ba']. If a Callable is provided must take the form
Callable[[np.ndarray], np.ndarray].
If you specify 'ba' then by default the distance measure used will be the same
as the distance measure used for clustering. If you wish to use a different
distance measure you can specify it by passing {"distance": "dtw"} as
If you specify 'ba' then by default the distance method used will be the same
as the distance method used for clustering. If you wish to use a different
distance method you can specify it by passing {"distance": "dtw"} as
averaging_params. BA yields 'better' clustering results but is very
computationally expensive so you may want to consider setting a bounding window
or using a different averaging method if time complexity is a concern.
Expand Down Expand Up @@ -236,7 +236,7 @@ def _fit_one_init(self, X: np.ndarray) -> tuple:
prev_labels = None
for i in range(self.max_iter):
curr_pw = pairwise_distance(
X, cluster_centres, measure=self.distance, **self._distance_params
X, cluster_centres, method=self.distance, **self._distance_params
)
curr_labels = curr_pw.argmin(axis=1)
curr_inertia = curr_pw.min(axis=1).sum()
Expand Down Expand Up @@ -273,13 +273,13 @@ def _fit_one_init(self, X: np.ndarray) -> tuple:
def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
if isinstance(self.distance, str):
pairwise_matrix = pairwise_distance(
X, self.cluster_centers_, measure=self.distance, **self._distance_params
X, self.cluster_centers_, method=self.distance, **self._distance_params
)
else:
pairwise_matrix = pairwise_distance(
X,
self.cluster_centers_,
measure=self.distance,
method=self.distance,
**self._distance_params,
)
return pairwise_matrix.argmin(axis=1)
Expand Down Expand Up @@ -346,7 +346,7 @@ def _kmeans_plus_plus_center_initializer(self, X: np.ndarray):

for _ in range(1, self.n_clusters):
pw_dist = pairwise_distance(
X, X[indexes], measure=self.distance, **self._distance_params
X, X[indexes], method=self.distance, **self._distance_params
)
min_distances = pw_dist.min(axis=1)
probabilities = min_distances / min_distances.sum()
Expand Down Expand Up @@ -381,7 +381,7 @@ def _handle_empty_cluster(
index_furthest_from_centre = curr_pw.min(axis=1).argmax()
cluster_centres[current_empty_cluster_index] = X[index_furthest_from_centre]
curr_pw = pairwise_distance(
X, cluster_centres, measure=self.distance, **self._distance_params
X, cluster_centres, method=self.distance, **self._distance_params
)
curr_labels = curr_pw.argmin(axis=1)
curr_inertia = curr_pw.min(axis=1).sum()
Expand Down
12 changes: 6 additions & 6 deletions aeon/clustering/_k_medoids.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class TimeSeriesKMedoids(BaseClusterer):
accurate than PAM. For a full review of varations of k-medoids for time series
see [5]_.
K-medoids for time series uses a dissimilarity measure to compute the distance
K-medoids for time series uses a dissimilarity method to compute the distance
between time series. The default is 'msm' (move split merge) as
it was found to significantly outperform the other measures in [2]_.
Expand All @@ -56,7 +56,7 @@ class TimeSeriesKMedoids(BaseClusterer):
If a np.ndarray provided it must be of shape (n_clusters,) and contain
the indexes of the time series to use as centroids.
distance : str or Callable, default='msm'
Distance measure to compute similarity between time series. A list of valid
Distance method to compute similarity between time series. A list of valid
strings for measures can be found in the documentation for
:func:`aeon.distances.get_distance_function`. If a callable is passed it must be
a function that takes two 2d numpy arrays as input and returns a float.
Expand Down Expand Up @@ -88,7 +88,7 @@ class TimeSeriesKMedoids(BaseClusterer):
If `None`, the random number generator is the `RandomState` instance used
by `np.random`.
distance_params: dict, default=None
Dictionary containing kwargs for the distance measure being used.
Dictionary containing kwargs for the distance method being used.
Attributes
----------
Expand Down Expand Up @@ -211,7 +211,7 @@ def _fit(self, X: np.ndarray, y=None):
def _predict(self, X: np.ndarray, y=None) -> np.ndarray:
if isinstance(self.distance, str):
pairwise_matrix = pairwise_distance(
X, self.cluster_centers_, measure=self.distance, **self._distance_params
X, self.cluster_centers_, method=self.distance, **self._distance_params
)
else:
pairwise_matrix = pairwise_distance(
Expand Down Expand Up @@ -456,7 +456,7 @@ def _check_params(self, X: np.ndarray) -> None:
f"n_clusters ({self.n_clusters}) cannot be larger than "
f"n_cases ({X.shape[0]})"
)
self._distance_callable = get_distance_function(measure=self.distance)
self._distance_callable = get_distance_function(method=self.distance)
self._distance_cache = np.full((X.shape[0], X.shape[0]), np.inf)

if self.method == "alternate":
Expand Down Expand Up @@ -486,7 +486,7 @@ def _kmedoids_plus_plus_center_initializer(self, X: np.ndarray):

for _ in range(1, self.n_clusters):
pw_dist = pairwise_distance(
X, X[indexes], measure=self.distance, **self._distance_params
X, X[indexes], method=self.distance, **self._distance_params
)
min_distances = pw_dist.min(axis=1)
probabilities = min_distances / min_distances.sum()
Expand Down
2 changes: 1 addition & 1 deletion aeon/clustering/averaging/_ba_petitjean.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def petitjean_barycenter_average(
random_state: int or None, default=None
Random state to use for the barycenter averaging.
**kwargs
Keyword arguments to pass to the distance measure.
Keyword arguments to pass to the distance method.
Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion aeon/clustering/averaging/_ba_subgradient.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def subgradient_barycenter_average(
random_state: int or None, default=None
Random state to use for the barycenter averaging.
**kwargs
Keyword arguments to pass to the distance measure.
Keyword arguments to pass to the distance method.
Returns
-------
Expand Down
4 changes: 2 additions & 2 deletions aeon/clustering/averaging/_ba_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def _medoids(
return X

if precomputed_pairwise_distance is None:
precomputed_pairwise_distance = pairwise_distance(X, measure=distance, **kwargs)
precomputed_pairwise_distance = pairwise_distance(X, method=distance, **kwargs)

x_size = X.shape[0]
distance_matrix = np.zeros((x_size, x_size))
Expand Down Expand Up @@ -155,6 +155,6 @@ def _get_alignment_path(
elif distance == "adtw":
return adtw_alignment_path(ts, center, window=window, warp_penalty=warp_penalty)
else:
# When numba version > 0.57 add more informative error with what measure
# When numba version > 0.57 add more informative error with what method
# was passed.
raise ValueError("Distance parameter invalid")
2 changes: 1 addition & 1 deletion aeon/clustering/averaging/_barycenter_averaging.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def elastic_barycenter_average(
random_state: int or None, default=None
Random state to use for the barycenter averaging.
**kwargs
Keyword arguments to pass to the distance measure.
Keyword arguments to pass to the distance method.
Returns
-------
Expand Down
Loading