diff --git a/aeon/classification/distance_based/__init__.py b/aeon/classification/distance_based/__init__.py index a2dfc26433..503e4c1df9 100644 --- a/aeon/classification/distance_based/__init__.py +++ b/aeon/classification/distance_based/__init__.py @@ -5,10 +5,16 @@ "KNeighborsTimeSeriesClassifier", "ProximityTree", "ProximityForest", + "ProximityTree2", + "ProximityForest2", ] from aeon.classification.distance_based._elastic_ensemble import ElasticEnsemble from aeon.classification.distance_based._proximity_forest import ProximityForest +from aeon.classification.distance_based._proximity_forest_2 import ( + ProximityForest2, + ProximityTree2, +) from aeon.classification.distance_based._proximity_tree import ProximityTree from aeon.classification.distance_based._time_series_neighbors import ( KNeighborsTimeSeriesClassifier, diff --git a/aeon/classification/distance_based/_proximity_forest_2.py b/aeon/classification/distance_based/_proximity_forest_2.py new file mode 100644 index 0000000000..71cf17d947 --- /dev/null +++ b/aeon/classification/distance_based/_proximity_forest_2.py @@ -0,0 +1,1083 @@ +"""Proximity Forest 2.0 Classifier.""" + +from typing import Any, Callable, Optional, TypedDict, Union + +import numpy as np +from joblib import Parallel, delayed +from numba import njit +from numba.typed import List as NumbaList +from sklearn.utils import check_random_state +from typing_extensions import Unpack + +from aeon.classification.base import BaseClassifier +from aeon.distances.elastic._bounding_matrix import create_bounding_matrix +from aeon.distances.elastic._lcss import lcss_distance +from aeon.distances.pointwise._minkowski import ( + _univariate_minkowski_distance, + minkowski_distance, +) + + +class ProximityForest2(BaseClassifier): + """Proximity Forest 2.0 Classifier. + + The Proximity Forest 2.0 outperforms the Proximity Forest Classifier. + PF 2.0 incorporates three recent advances in time series similarity measures + (1) computationally e cient early abandoning and pruning to speedup elastic + similarity computations; (2) a new elastic similarity measure, Amerced Dynamic + Time Warping (ADTW); and (3) cost function tuning. + + Parameters + ---------- + n_trees: int, default = 100 + The number of trees, by default an ensemble of 100 trees is formed. + n_splitters: int, default = 5 + The number of candidate splitters to be evaluated at each node. + max_depth: int, default = None + The maximum depth of the tree. If None, then nodes are expanded until all + leaves are pure or until all leaves contain less than min_samples_split samples. + min_samples_split: int, default = 2 + The minimum number of samples required to split an internal node. + random_state : int, RandomState instance or None, default=None + If `int`, random_state is the seed used by the random number generator; + If `RandomState` instance, random_state is the random number generator; + If `None`, the random number generator is the `RandomState` instance used + by `np.random`. + n_jobs : int, default = 1 + The number of parallel jobs to run for neighbors search. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors. See :term:`Glossary ` + for more details. Parameter for compatibility purposes, still unimplemented. + parallel_backend : str, ParallelBackendBase instance or None, default=None + Specify the parallelisation backend implementation in joblib, if None a 'prefer' + value of "threads" is used by default. + Valid options are "loky", "multiprocessing", "threading" or a custom backend. + See the joblib Parallel documentation for more details. + + Notes + ----- + For the C++ version, see + `ProximityForest2.0 + `_. + + Also refer to the Proximity Forest implementation, + see the :class:`~aeon.classification.distance_based.ProximityForest` API. + + References + ---------- + .. [1] Matthieu Herrmann, Chang Wei Tan, Mahsa Salehi, Geoffrey I. Webb. + Proximity Forest 2.0: A new e ective and scalable similarity-based classifier + for time series, https://doi.org/10.48550/arXiv.2304.05800. + + Examples + -------- + >>> from aeon.datasets import load_unit_test + >>> from aeon.classification.distance_based import ProximityForest + >>> X_train, y_train = load_unit_test(split="train") + >>> X_test, y_test = load_unit_test(split="test") + >>> classifier = ProximityForest2(n_trees = 10, n_splitters = 3) + >>> classifier.fit(X_train, y_train) + ProximityForest2(...) + >>> y_pred = classifier.predict(X_test) + """ + + _tags = { + "capability:multivariate": False, + "capability:unequal_length": False, + "capability:multithreading": True, + "algorithm_type": "distance", + "X_inner_type": ["numpy2D"], + } + + def __init__( + self, + n_trees=100, + n_splitters: int = 5, + max_depth: int = None, + min_samples_split: int = 2, + random_state: Union[int, type[np.random.RandomState], None] = None, + n_jobs: int = 1, + parallel_backend=None, + ): + self.n_trees = n_trees + self.n_splitters = n_splitters + self.max_depth = max_depth + self.min_samples_split = min_samples_split + self.random_state = random_state + self.n_jobs = n_jobs + self.parallel_backend = parallel_backend + super().__init__() + + def _fit(self, X, y): + rng = check_random_state(self.random_state) + seeds = rng.randint(np.iinfo(np.int32).max, size=self.n_trees) + self.trees_ = Parallel( + n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads" + )( + delayed(_fit_tree)( + X, + y, + self.n_splitters, + self.max_depth, + self.min_samples_split, + check_random_state(seed), + ) + for seed in seeds + ) + + def _predict_proba(self, X): + classes = list(self.classes_) + preds = Parallel( + n_jobs=self._n_jobs, backend=self.parallel_backend, prefer="threads" + )(delayed(_predict_tree)(tree, X) for tree in self.trees_) + n_cases = X.shape[0] + votes = np.zeros((n_cases, self.n_classes_)) + for i in range(len(preds)): + predictions = np.array( + [classes.index(class_label) for class_label in preds[i]] + ) + for j in range(n_cases): + votes[j, predictions[j]] += 1 + output_probas = votes / self.n_trees + return output_probas + + def _predict(self, X): + probas = self._predict_proba(X) + idx = np.argmax(probas, axis=1) + preds = np.asarray([self.classes_[x] for x in idx]) + return preds + + +def _fit_tree(X, y, n_splitters, max_depth, min_samples_split, random_state): + clf = ProximityTree2( + n_splitters=n_splitters, + max_depth=max_depth, + min_samples_split=min_samples_split, + random_state=random_state, + ) + clf.fit(X, y) + return clf + + +def _predict_tree(tree, X): + return tree.predict(X) + + +class _Node: + """Proximity Tree node. + + Parameters + ---------- + node_id: str + The id of node, root node has id 0. + _is_leaf: bool + To identify leaf nodes. + label: int, str or None + Contains the class label of leaf node, None otherwise. + splitter: dict + The splitter used to split the node. + class_distribution: dict + In case of unpure leaf node, save the class distribution to calculate + probability of each class. + children: dict + Contains the class label and the associated node, empty for leaf node. + """ + + def __init__( + self, + node_id: str, + _is_leaf: bool, + label=None, + class_distribution=None, + splitter=None, + ): + self.node_id = node_id + self._is_leaf = _is_leaf + self.label = label + self.splitter = splitter + self.class_distribution = class_distribution or {} + self.children = {} + + +class ProximityTree2(BaseClassifier): + """Proximity Tree classifier for PF2.0. + + It leverages ADTW and tuning the cost functions for TSC to increase accuracy, + together with the computational e ciency of computing these distances provided by + EAP. + + PF 2.0 modifies the splitters to comprise three elements, a set of class exemplars, + a parameterised similarity measure and a time series transform. + + Parameters + ---------- + n_splitters: int, default = 5 + The number of candidate splitters to be evaluated at each node. + max_depth: int, default = None + The maximum depth of the tree. If None, then nodes are expanded until all + leaves are pure or until all leaves contain less than min_samples_split samples. + min_samples_split: int, default = 2 + The minimum number of samples required to split an internal node. + random_state : int, RandomState instance or None, default=None + If `int`, random_state is the seed used by the random number generator; + If `RandomState` instance, random_state is the random number generator; + If `None`, the random number generator is the `RandomState` instance used + by `np.random`. + + Notes + ----- + For the C++ version, see + `ProximityForest2.0 + `_. + + Also refer to the Proximity Tree implementation, + see the :class:`~aeon.classification.distance_based.ProximityTree` API. + + References + ---------- + .. [1] Matthieu Herrmann, Chang Wei Tan, Mahsa Salehi, Geoffrey I. Webb. + Proximity Forest 2.0: A new e ective and scalable similarity-based classifier + for time series, https://doi.org/10.48550/arXiv.2304.05800. + + Examples + -------- + >>> from aeon.datasets import load_unit_test + >>> from aeon.classification.distance_based import ProximityTree + >>> X_train, y_train = load_unit_test(split="train") + >>> X_test, y_test = load_unit_test(split="test") + >>> classifier = ProximityTree2(n_splitters = 3) + >>> classifier.fit(X_train, y_train) + ProximityTree2(...) + >>> y_pred = classifier.predict(X_test) + """ + + _tags = { + "capability:multivariate": False, + "capability:unequal_length": False, + "algorithm_type": "distance", + "X_inner_type": ["numpy2D"], + } + + def __init__( + self, + n_splitters: int = 5, + max_depth: int = None, + min_samples_split: int = 2, + random_state: Union[int, type[np.random.RandomState], None] = None, + ) -> None: + self.n_splitters = n_splitters + self.max_depth = max_depth + self.min_samples_split = min_samples_split + self.random_state = random_state + super().__init__() + + def _get_parameter_value(self, X): + """Generate random parameter values. + + For a list of distance measures, generate a dictionary + of parameterized distances. + + Parameters + ---------- + X : np.ndarray of shape (n_cases, n_timepoints) + + Returns + ------- + distance_param : a dictionary of distances and their + parameters. + """ + rng = check_random_state(self.random_state) + + X_std = X.std() + param_ranges = { + "dtw": {"window": (0, 0.25)}, + "adtw": {"window": (0, 0.25)}, + "lcss": {"epsilon": (X_std / 5, X_std), "window": (0, 0.25)}, + } + random_params = {} + for measure, ranges in param_ranges.items(): + random_params[measure] = { + param: np.round(rng.uniform(low, high), 3) + for param, (low, high) in ranges.items() + } + + return random_params + + def _get_candidate_splitter(self, X, y): + """Generate candidate splitter. + + Takes a time series dataset and a set of parameterized + distance measures to create a candidate splitter, which + contains a parameterized distance measure and a set of exemplars. + + Parameters + ---------- + X : np.ndarray shape (n_cases, n_timepoints) + The training input samples. + y : np.array shape (n_cases,) or (n_cases,1) + The labels of the training samples. + parameterized_distances : dictionary + Contains the distances and their parameters. + + Returns + ------- + splitter : list of two dictionaries + A distance and its parameter values and a set of exemplars. + """ + rng = check_random_state(self.random_state) + + # Class exemplars + exemplars = {} + for label in np.unique(y): + y_new = y[y == label] + X_new = X[y == label] + id = rng.randint(0, X_new.shape[0]) + exemplars[y_new[id]] = X_new[id, :] + + # Time series transform + transforms = ["raw", "first_derivative"] + t = rng.randint(0, 2) + transform = transforms[t] + + # random parameterized distance measure + parameterized_distances = self._get_parameter_value(X) + n = rng.randint(0, 3) + dist = list(parameterized_distances.keys())[n] + if dist == "dtw": + values = [0.5, 1, 2] + p = rng.choice(values) + parameterized_distances[dist]["p"] = p + if dist == "adtw": + values = [0.5, 1, 2] + p = rng.choice(values) + parameterized_distances[dist]["p"] = p + i = rng.randint(1, 101) + w = ((i / 100) ** 5) * self._max_warp_penalty + parameterized_distances[dist]["warp_penalty"] = w + + # Create a list of class exemplars, distance measures and transform + splitter = [exemplars, {dist: parameterized_distances[dist]}, transform] + + return splitter + + def _get_best_splitter(self, X, y): + """Get the splitter for a node which maximizes the gini gain.""" + max_gain = float("-inf") + best_splitter = None + for _ in range(self.n_splitters): + splitter = self._get_candidate_splitter(X, y) + labels = list(splitter[0].keys()) + measure = list(splitter[1].keys())[0] + transform = splitter[2] + if transform == "first_derivative": + X_trans = NumbaList() + for i in range(len(X)): + X_trans.append(first_order_derivative(X[i])) + exemplars = NumbaList() + for i in range(len(labels)): + exemplars.append( + first_order_derivative(list(splitter[0].values())[i]) + ) + else: + X_trans = X + exemplars = np.array(list(splitter[0].values())) + + y_subs = [[] for _ in range(len(labels))] + for j in range(len(X_trans)): + min_dist = float("inf") + sub = None + for k in range(len(labels)): + if measure == "dtw" or measure == "adtw": + splitter[1][measure]["threshold"] = min_dist + dist = distance( + X_trans[j], + exemplars[k], + metric=measure, + **splitter[1][measure], + ) + if dist < min_dist: + min_dist = dist + sub = k + y_subs[sub].append(y[j]) + y_subs = [np.array(ele, dtype=y.dtype) for ele in y_subs] + gini_index = gini_gain(y, y_subs) + if gini_index > max_gain: + max_gain = gini_index + best_splitter = splitter + return best_splitter + + def _build_tree(self, X, y, depth, node_id, parent_target_value=None): + """Build the tree recursively from the root node down to the leaf nodes.""" + # If the data reaching the node is empty + if len(X) == 0: + leaf_label = parent_target_value + leaf_distribution = {} + leaf = _Node( + node_id=node_id, + _is_leaf=True, + label=leaf_label, + class_distribution=leaf_distribution, + ) + return leaf + + # Target value in current node + target_value = self._find_target_value(y) + class_distribution = { + label: count / len(y) + for label, count in zip(*np.unique(y, return_counts=True)) + } + + # Pure node + if len(np.unique(y)) == 1: + leaf_label = target_value + leaf = _Node( + node_id=node_id, + _is_leaf=True, + label=leaf_label, + class_distribution=class_distribution, + ) + return leaf + + # If min sample splits is reached + if self.min_samples_split >= len(X): + leaf_label = target_value + leaf = _Node( + node_id=node_id, + _is_leaf=True, + label=leaf_label, + class_distribution=class_distribution, + ) + return leaf + + # If max depth is reached + if (self.max_depth is not None) and (depth >= self.max_depth): + leaf_label = target_value + leaf = _Node( + node_id=node_id, + _is_leaf=True, + label=leaf_label, + class_distribution=class_distribution, + ) + return leaf + + # Find the best splitter + splitter = self._get_best_splitter(X, y) + + # Create root node + node = _Node(node_id=node_id, _is_leaf=False, splitter=splitter) + + # For each exemplar split the data + labels = list(splitter[0].keys()) + measure = list(splitter[1].keys())[0] + transform = splitter[2] + if transform == "first_derivative": + X_trans = NumbaList() + for i in range(len(X)): + X_trans.append(first_order_derivative(X[i])) + exemplars = NumbaList() + for i in range(len(labels)): + exemplars.append(first_order_derivative(list(splitter[0].values())[i])) + else: + X_trans = X + exemplars = np.array(list(splitter[0].values())) + + X_child = [[] for _ in labels] + y_child = [[] for _ in labels] + for i in range(len(X_trans)): + min_dist = np.inf + id = None + for j in range(len(labels)): + if measure == "dtw" or measure == "adtw": + splitter[1][measure]["threshold"] = min_dist + dist = distance( + X_trans[i], + exemplars[j], + metric=measure, + **splitter[1][measure], + ) + if dist < min_dist: + min_dist = dist + id = j + X_child[id].append(X[i]) + y_child[id].append(y[i]) + X_child = [np.array(ele) for ele in X_child] + y_child = [np.array(ele) for ele in y_child] + # For each exemplar, create a branch + for i in range(len(labels)): + child_node_id = node_id + "." + str(i) + child_node = self._build_tree( + X_child[i], + y_child[i], + depth=depth + 1, + node_id=child_node_id, + parent_target_value=target_value, + ) + node.children[labels[i]] = child_node + + return node + + @staticmethod + @njit(cache=True, fastmath=True) + def _find_target_value(y): + """Get the class label of highest frequency.""" + unique_labels = list(np.unique(y)) + class_counts = [] + for i in range(len(unique_labels)): + cnt = 0 + for j in range(len(y)): + if y[j] == unique_labels[i]: + cnt += 1 + class_counts.append(cnt) + class_counts = np.array(class_counts) + # Find the index of the maximum count + max_index = np.argmax(class_counts) + mode_value = unique_labels[max_index] + # mode_count = counts[max_index] + return mode_value + + def _fit(self, X, y): + self._max_warp_penalty = _global_warp_penalty(X) + self.root = self._build_tree( + X, y, depth=0, node_id="0", parent_target_value=None + ) + + def _predict(self, X): + probas = self._predict_proba(X) + predictions = np.argmax(probas, axis=1) + return np.array([self.classes_[pred] for pred in predictions]) + + def _predict_proba(self, X): + # Get the unique class labels + classes = list(self.classes_) + class_count = len(classes) + probas = [] + + for i in range(len(X)): + # Classify the data point and find the leaf node + leaf_node = self._classify(self.root, X[i]) + + # Create probability distribution based on class counts in the leaf node + proba = np.zeros(class_count) + for class_label, class_proba in leaf_node.class_distribution.items(): + proba[classes.index(class_label)] = class_proba + probas.append(proba) + + return np.array(probas) + + def _classify(self, treenode, x): + # Classify one data point using the proximity tree + if treenode._is_leaf: + return treenode + else: + measure = list(treenode.splitter[1].keys())[0] + branches = list(treenode.splitter[0].keys()) + transform = treenode.splitter[2] + if transform == "first_derivative": + x_trans = first_order_derivative(x) + exemplars = NumbaList() + for i in range(len(list(treenode.splitter[0].values()))): + exemplars.append( + first_order_derivative(list(treenode.splitter[0].values())[i]) + ) + else: + x_trans = x + exemplars = np.array(list(treenode.splitter[0].values())) + min_dist = np.inf + id = None + for i in range(len(branches)): + if measure == "dtw" or measure == "adtw": + treenode.splitter[1][measure]["threshold"] = min_dist + dist = distance( + x_trans, + exemplars[i], + metric=measure, + **treenode.splitter[1][measure], + ) + if dist < min_dist: + min_dist = dist + id = i + return self._classify(treenode.children[branches[id]], x) + + +@njit(cache=True, fastmath=True) +def gini(y) -> float: + """Get gini score at a specific node. + + Parameters + ---------- + y : 1d numpy array + array of class labels + + Returns + ------- + score : float + gini score for the set of class labels (i.e. how pure they are). A + larger score means more impurity. Zero means + pure. + """ + # get number instances at node + n_instances = y.shape[0] + if n_instances > 0: + # count each class + unique_labels = list(np.unique(y)) + class_counts = [] + for i in range(len(unique_labels)): + cnt = 0 + for j in range(len(y)): + if y[j] == unique_labels[i]: + cnt += 1 + class_counts.append(cnt) + class_counts = np.array(class_counts) + # subtract class entropy from current score for each class + class_counts = np.divide(class_counts, n_instances) + class_counts = np.power(class_counts, 2) + sum = np.sum(class_counts) + return 1 - sum + else: + # y is empty, therefore considered pure + raise ValueError("y empty") + + +@njit(cache=True, fastmath=True) +def gini_gain(y, y_subs) -> float: + """Get gini score of a split, i.e. the gain from parent to children. + + Parameters + ---------- + y : 1d array + array of class labels at parent + y_subs : list of 1d array like + list of array of class labels, one array per child + + Returns + ------- + score : float + gini score of the split from parent class labels to children. Note a + higher score means better gain, + i.e. a better split + """ + # find number of instances overall + parent_n_instances = y.shape[0] + # if parent has no instances then is pure + if parent_n_instances == 0: + for child in y_subs: + if len(child) > 0: + raise ValueError("children populated but parent empty") + return 0.5 + # find gini for parent node + score = gini(y) + # sum the children's gini scores + for index in range(len(y_subs)): + child_class_labels = y_subs[index] + # ignore empty children + if len(child_class_labels) > 0: + # find gini score for this child + child_score = gini(child_class_labels) + # weight score by proportion of instances at child compared to + # parent + child_size = len(child_class_labels) + child_score *= child_size / parent_n_instances + # add to cumulative sum + score -= child_score + return score + + +@njit(cache=True, fastmath=True) +def _global_warp_penalty(X): + n = len(X) + pairs = [ + (X[np.random.randint(0, n)], X[np.random.randint(0, n)]) for _ in range(4000) + ] + distances = np.array([minkowski_distance(pair[0], pair[1], 2) for pair in pairs]) + penalty = np.mean(distances) + return penalty + + +@njit(cache=True, fastmath=True) +def first_order_derivative_1d(q: np.ndarray) -> np.ndarray: + """Compute the first-order derivative of a 1D time series.""" + n = len(q) + result = np.zeros(n) + + # First element + result[0] = (q[1] - q[0]) / 2.0 + + # Middle elements + for j in range(1, n - 1): + result[j] = ((q[j] - q[j - 1]) + (q[j + 1] - q[j - 1]) / 2.0) / 2.0 + + # Last element + result[-1] = (q[-1] - q[-2]) / 2.0 + + return result + + +@njit(cache=True, fastmath=True) +def first_order_derivative_2d(q: np.ndarray) -> np.ndarray: + """Compute the first-order derivative of a 2D time series.""" + n = q.shape[1] + result = np.zeros((1, n)) + + # First element + result[0, 0] = (q[0, 1] - q[0, 0]) / 2.0 + + # Middle elements + for j in range(1, n - 1): + result[0, j] = ( + (q[0, j] - q[0, j - 1]) + (q[0, j + 1] - q[0, j - 1]) / 2.0 + ) / 2.0 + + # Last element + result[0, -1] = (q[0, -1] - q[0, -2]) / 2.0 + + return result + + +@njit(cache=True, fastmath=True) +def first_order_derivative(q: np.ndarray) -> np.ndarray: + """ + Compute the first-order derivative of the time series. + + Parameters + ---------- + q : np.ndarray (n_timepoints,) or (1, n_timepoints) + Time series to take derivative of. + + Returns + ------- + np.ndarray + The first-order derivative of q with the same shape as the input. + """ + if q.ndim == 1: + return first_order_derivative_1d(q) + elif q.ndim == 2 and q.shape[0] == 1: + return first_order_derivative_2d(q) + else: + raise ValueError( + "Time series must be either (n_timepoints,) or (1, n_timepoints)." + ) + + +# Distances for PF 2. + + +class DistanceKwargs(TypedDict, total=False): + window: Optional[float] + itakura_max_slope: Optional[float] + p: float + w: np.ndarray + epsilon: float + warp_penalty: float + threshold: float + + +DistanceFunction = Callable[[np.ndarray, np.ndarray, Any], float] + + +def distance( + x: np.ndarray, + y: np.ndarray, + metric: Union[str, DistanceFunction], + **kwargs: Unpack[DistanceKwargs], +) -> float: + r"""Compute the distance between two time series. + + Sourced from the distance module to use in the PF 2.0 algorithm. + + Parameters + ---------- + x : np.ndarray + First time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + y : np.ndarray + Second time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + metric : str or Callable + The distance metric to use. + A list of valid distance metrics can be found in the documentation for + :func:`aeon.distances.get_distance_function` or by calling the function + :func:`aeon.distances.get_distance_function_names`. + kwargs : Any + Arguments for metric. Refer to each metrics documentation for a list of + possible arguments. + + Returns + ------- + float + Distance between the x and y. + + Raises + ------ + ValueError + If x and y are not 1D, or 2D arrays. + If metric is not a valid string or callable. + """ + if metric == "minkowski": + return minkowski_distance(x, y, kwargs.get("p", 2.0), kwargs.get("w", None)) + elif metric == "dtw": + return _dtw_distance( + x, + y, + p=kwargs.get("p"), + window=kwargs.get("window"), + itakura_max_slope=kwargs.get("itakura_max_slope"), + threshold=kwargs.get("threshold"), + ) + elif metric == "lcss": + return lcss_distance( + x, + y, + kwargs.get("window"), + kwargs.get("epsilon", 1.0), + kwargs.get("itakura_max_slope"), + ) + elif metric == "adtw": + return _adtw_distance( + x, + y, + p=kwargs.get("p"), + itakura_max_slope=kwargs.get("itakura_max_slope"), + window=kwargs.get("window"), + warp_penalty=kwargs.get("warp_penalty", 1.0), + threshold=kwargs.get("threshold"), + ) + else: + if isinstance(metric, Callable): + return metric(x, y, **kwargs) + raise ValueError("Metric must be one of the supported strings or a callable") + + +@njit(cache=True, fastmath=True) +def _dtw_distance( + x: np.ndarray, + y: np.ndarray, + p: float = 2.0, + window: Optional[float] = None, + itakura_max_slope: Optional[float] = None, + threshold: Optional[float] = np.inf, +) -> float: + r"""Return parameterised DTW distance for PF 2.0. + + DTW is the most widely researched and used elastic distance measure. It mitigates + distortions in the time axis by realligning (warping) the series to best match + each other. A good background into DTW can be found in [1]_. For two series, + possibly of unequal length, + :math:`\\mathbf{x}=\\{x_1,x_2,\\ldots,x_n\\}` and + :math:`\\mathbf{y}=\\{y_1,y_2, \\ldots,y_m\\}` DTW first calculates + :math:`M(\\mathbf{x},\\mathbf{y})`, the :math:`n \times m` + pointwise distance matrix between series :math:`\\mathbf{x}` and :math:`\\mathbf{y}` + , where :math:`M_{i,j}= (x_i-y_j)^p`. + A warping path + .. math:: + P = <(e_1, f_1), (e_2, f_2), \\ldots, (e_s, f_s)> + is a set of pairs of indices that define a traversal of matrix :math:`M`. A + valid warping path must start at location :math:`(1,1)` and end at point :math:`( + n,m)` and not backtrack, i.e. :math:`0 \\leq e_{i+1}-e_{i} \\leq 1` and :math:`0 + \\leq f_{i+1}- f_i \\leq 1` for all :math:`1< i < m`. + The DTW distance between series is the path through :math:`M` that minimizes the + total distance. The distance for any path :math:`P` of length :math:`s` is + .. math:: + D_P(\\mathbf{x},\\mathbf{y}, M) =\\sum_{i=1}^s M_{e_i,f_i} + If :math:`\\mathcal{P}` is the space of all possible paths, the DTW path :math:`P^*` + is the path that has the minimum distance, hence the DTW distance between series is + .. math:: + d_{dtw}(\\mathbf{x}, \\mathbf{x}) =D_{P*}(\\mathbf{x},\\mathbf{x}, M). + The optimal warping path :math:`P^*` can be found exactly through a dynamic + programming formulation. This can be a time consuming operation, and it is common to + put a restriction on the amount of warping allowed. This is implemented through + the bounding_matrix structure, that supplies a mask for allowable warpings. + The most common bounding strategies include the Sakoe-Chiba band [2]_. The width + of the allowed warping is controlled through the ``window`` parameter + which sets the maximum proportion of warping allowed. + + Parameters + ---------- + x : np.ndarray + First time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + y : np.ndarray + Second time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + p : float, default=2.0 + The order of the norm of the difference + (default is 2.0, which represents the Euclidean distance). + window : float or None, default=None + The window to use for the bounding matrix. If None, no bounding matrix + is used. window is a percentage deviation, so if ``window = 0.1`` then + 10% of the series length is the max warping allowed. + is used. + itakura_max_slope : float, default=None + Maximum slope as a proportion of the number of time points used to create + Itakura parallelogram on the bounding matrix. Must be between 0. and 1. + threshold : float, default=np.inf + Threshold to stop the calculation of cost matrix. + + Returns + ------- + float + DTW distance between x and y, minimum value 0. + + Raises + ------ + ValueError + If x and y are not 1D or 2D arrays. + + References + ---------- + .. [1] Ratanamahatana C and Keogh E.: Three myths about dynamic time warping data + mining, Proceedings of 5th SIAM International Conference on Data Mining, 2005. + .. [2] Sakoe H. and Chiba S.: Dynamic programming algorithm optimization for + spoken word recognition. IEEE Transactions on Acoustics, Speech, and Signal + Processing 26(1):43–49, 1978. + """ + if x.ndim == 1 and y.ndim == 1: + _x = x.reshape((1, x.shape[0])) + _y = y.reshape((1, y.shape[0])) + bounding_matrix = create_bounding_matrix( + _x.shape[1], _y.shape[1], window, itakura_max_slope + ) + return _dtw_cost_matrix(_x, _y, p, bounding_matrix, threshold)[ + _x.shape[1] - 1, _y.shape[1] - 1 + ] + if x.ndim == 2 and y.ndim == 2: + bounding_matrix = create_bounding_matrix( + x.shape[1], y.shape[1], window, itakura_max_slope + ) + return _dtw_cost_matrix(x, y, p, bounding_matrix, threshold)[ + x.shape[1] - 1, y.shape[1] - 1 + ] + raise ValueError("x and y must be 1D or 2D") + + +@njit(cache=True, fastmath=True) +def _dtw_cost_matrix( + x: np.ndarray, + y: np.ndarray, + p: float, + bounding_matrix: np.ndarray, + threshold: float, +) -> np.ndarray: + x_size = x.shape[1] + y_size = y.shape[1] + cost_matrix = np.full((x_size + 1, y_size + 1), np.inf) + cost_matrix[0, 0] = 0.0 + _w = np.ones_like(x) + for i in range(x_size): + for j in range(y_size): + if bounding_matrix[i, j]: + cost_matrix[i + 1, j + 1] = _univariate_minkowski_distance( + x[:, i], y[:, j], p, _w[:, i] + ) + min( + cost_matrix[i, j + 1], + cost_matrix[i + 1, j], + cost_matrix[i, j], + ) + if cost_matrix[i + 1, j + 1] > threshold: + break + return cost_matrix[1:, 1:] + + +@njit(cache=True, fastmath=True) +def _adtw_distance( + x: np.ndarray, + y: np.ndarray, + p: float = 2.0, + window: Optional[float] = None, + itakura_max_slope: Optional[float] = None, + warp_penalty: float = 1.0, + threshold: Optional[float] = np.inf, +) -> float: + """Parameterised version of ADTW distance for PF 2.0. + + Amercing Dynamic Time Warping (ADTW) [1]_ is a variant of DTW that uses a + explicit warping penalty to encourage or discourage warping. The warping + penalty is a constant value that is added to the cost of warping. A high + value will encourage the algorithm to warp less and if the value is low warping + is more likely. + + Parameters + ---------- + x : np.ndarray + First time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + y : np.ndarray + Second time series, either univariate, shape ``(n_timepoints,)``, or + multivariate, shape ``(n_channels, n_timepoints)``. + p : float, default=2.0 + The order of the norm of the difference + (default is 2.0, which represents the Euclidean distance). + window : float or None, default=None + The window to use for the bounding matrix. If None, no bounding matrix + is used. window is a percentage deviation, so if ``window = 0.1`` then + 10% of the series length is the max warping allowed. + itakura_max_slope : float, default=None + Maximum slope as a proportion of the number of time points used to create + Itakura parallelogram on the bounding matrix. Must be between 0.0 and 1.0 + warp_penalty: float, default=1.0 + Penalty for warping. A high value will mean less warping. + threshold: float, default=np.inf + The threshold to stop the calculation of cost matrix. + + Returns + ------- + float + ADTW distance between x and y, minimum value 0. + + Raises + ------ + ValueError + If x and y are not 1D or 2D arrays. + + References + ---------- + .. [1] Matthieu Herrmann, Geoffrey I. Webb: Amercing: An intuitive and effective + constraint for dynamic time warping, Pattern Recognition, Volume 137, 2023. + """ + if x.ndim == 1 and y.ndim == 1: + _x = x.reshape((1, x.shape[0])) + _y = y.reshape((1, y.shape[0])) + bounding_matrix = create_bounding_matrix( + _x.shape[1], _y.shape[1], window, itakura_max_slope + ) + return _adtw_cost_matrix(_x, _y, p, bounding_matrix, warp_penalty, threshold)[ + _x.shape[1] - 1, _y.shape[1] - 1 + ] + if x.ndim == 2 and y.ndim == 2: + bounding_matrix = create_bounding_matrix( + x.shape[1], y.shape[1], window, itakura_max_slope + ) + return _adtw_cost_matrix(x, y, p, bounding_matrix, warp_penalty, threshold)[ + x.shape[1] - 1, y.shape[1] - 1 + ] + raise ValueError("x and y must be 1D or 2D") + + +@njit(cache=True, fastmath=True) +def _adtw_cost_matrix( + x: np.ndarray, + y: np.ndarray, + p: float, + bounding_matrix: np.ndarray, + warp_penalty: float, + threshold: float, +) -> np.ndarray: + x_size = x.shape[1] + y_size = y.shape[1] + cost_matrix = np.full((x_size + 1, y_size + 1), np.inf) + cost_matrix[0, 0] = 0.0 + + _w = np.ones_like(x) + for i in range(x_size): + for j in range(y_size): + if bounding_matrix[i, j]: + cost_matrix[i + 1, j + 1] = _univariate_minkowski_distance( + x[:, i], y[:, j], p, _w[:, i] + ) + min( + cost_matrix[i, j + 1] + warp_penalty, + cost_matrix[i + 1, j] + warp_penalty, + cost_matrix[i, j], + ) + if cost_matrix[i + 1, j + 1] > threshold: + break + + return cost_matrix[1:, 1:] diff --git a/aeon/classification/distance_based/tests/test_proximity_forest_2.py b/aeon/classification/distance_based/tests/test_proximity_forest_2.py new file mode 100644 index 0000000000..a1352fb27f --- /dev/null +++ b/aeon/classification/distance_based/tests/test_proximity_forest_2.py @@ -0,0 +1,96 @@ +"""Tests for ProximityForest2.0.""" + +import numpy as np +import pytest +from sklearn.metrics import accuracy_score + +from aeon.classification.distance_based import ProximityForest2, ProximityTree2 +from aeon.datasets import load_unit_test + + +def test_get_parameter_value(): + """Test the distance parameters generated.""" + X = np.random.rand(10, 50) + random_state = 42 + tree = ProximityTree2(random_state=random_state) + + params = tree._get_parameter_value(X) + + # Check if the parameters are generated for all distance measures + expected_measures = [ + "dtw", + "adtw", + "lcss", + ] + assert set(params.keys()) == set(expected_measures) + + # Check specific parameter ranges + for measure, measure_params in params.items(): + if measure in ["dtw", "adtw", "lcss"]: + assert 0 <= measure_params["window"] <= 0.25 + elif measure == "lcss": + X_std = X.std() + assert X_std / 5 <= measure_params["epsilon"] <= X_std + + +def test_get_cadidate_splitter(): + """Test the method to generate candidate splitters.""" + X, y = load_unit_test() + clf = ProximityTree2(max_depth=1) + clf.fit(X, y) + splitter = clf._get_candidate_splitter(X, y) + assert len(splitter) == 3 + + expected_measures = [ + "dtw", + "adtw", + "lcss", + ] + measure = list(splitter[1].keys())[0] + assert measure in expected_measures + + +def test_get_best_splitter(): + """Test the method to get optimum splitter of a node.""" + X, y = load_unit_test() + clf = ProximityTree2(n_splitters=3, max_depth=1) + clf.fit(X, y) + + splitter = clf._get_best_splitter(X, y) + + assert splitter is not None + + assert isinstance(splitter, list) + + assert len(splitter) == 3 + + +def test_proximity_tree(): + """Test the fit method of ProximityTree.""" + X, y = load_unit_test() + clf = ProximityTree2(n_splitters=3, max_depth=4) + clf.fit(X, y) + X_test, y_test = load_unit_test(split="train") + y_pred = clf.predict(X_test) + score = accuracy_score(y_test, y_pred) + assert score >= 0.9 + + +def test_univariate(): + """Test that the function gives appropriate error message.""" + X, y = load_unit_test() + X_multivariate = X.reshape((-1, 2, 12)) + clf = ProximityForest2(n_trees=5) + with pytest.raises(ValueError): + clf.fit(X_multivariate, y) + + +def test_proximity_forest(): + """Test the fit method of ProximityTree.""" + X_train, y_train = load_unit_test() + X_test, y_test = load_unit_test(split="test") + clf = ProximityForest2(n_trees=5, n_splitters=3, max_depth=4, random_state=42) + clf.fit(X_train, y_train) + y_pred = clf.predict(X_test) + score = accuracy_score(y_test, y_pred) + assert score >= 0.9 diff --git a/docs/api_reference/classification.rst b/docs/api_reference/classification.rst index 4f29f61692..f2d28b611a 100644 --- a/docs/api_reference/classification.rst +++ b/docs/api_reference/classification.rst @@ -79,6 +79,8 @@ Distance-based KNeighborsTimeSeriesClassifier ProximityForest ProximityTree + ProximityTree2 + ProximityForest2 Feature-based -------------