From af14cd32fe013c89de3d34d273d7e8778dbd75b4 Mon Sep 17 00:00:00 2001 From: Matthew Middlehurst Date: Mon, 23 Sep 2024 23:35:00 +0300 Subject: [PATCH] benchmarking (#2082) --- aeon/benchmarking/benchmarks.py | 80 ------------- aeon/benchmarking/utils.py | 189 ----------------------------- aeon/performance_metrics/stats.py | 192 +++++++++++++++++++++++++++++- pyproject.toml | 1 - 4 files changed, 189 insertions(+), 273 deletions(-) delete mode 100644 aeon/benchmarking/benchmarks.py delete mode 100644 aeon/benchmarking/utils.py diff --git a/aeon/benchmarking/benchmarks.py b/aeon/benchmarking/benchmarks.py deleted file mode 100644 index b8f1478741..0000000000 --- a/aeon/benchmarking/benchmarks.py +++ /dev/null @@ -1,80 +0,0 @@ -"""Benchmarking interface for use with aeon objects. - -Wraps kotsu benchmarking package. -""" - -from typing import Callable, Optional, Union -from warnings import warn - -import pandas as pd - -from aeon.base import BaseEstimator -from aeon.utils.validation._dependencies import _check_soft_dependencies - - -class BaseBenchmark: - """Base class for benchmarks. - - A benchmark consists of a set of tasks and a set of estimators. - """ - - def __init__(self): - warn("benchmarking is experimental and interfaces may change.") - _check_soft_dependencies("kotsu") - import kotsu - - self.estimators = kotsu.registration.ModelRegistry() - self.validations = kotsu.registration.ValidationRegistry() - self.kotsu_run = kotsu.run.run - - def add_estimator( - self, - estimator: BaseEstimator, - estimator_id: Optional[str] = None, - ): - """Register an estimator to the benchmark. - - Parameters - ---------- - estimator : BaseEstimator object - Estimator to add to the benchmark. - estimator_id : str, default=None - Identifier for estimator. If none given then uses estimator's class name. - - """ - estimator_id = estimator_id or f"{estimator.__class__.__name__}-v1" - estimator = estimator.clone() # extra cautious - self.estimators.register(id=estimator_id, entry_point=estimator.clone) - - def _add_task( - self, - task_entrypoint: Union[Callable, str], - task_kwargs: Optional[dict] = None, - task_id: Optional[str] = None, - ): - """Register a task to the benchmark.""" - task_id = task_id or ( - f"{task_entrypoint}-v1" - if isinstance(task_entrypoint, str) - else f"{task_entrypoint.__name__}-v1" - ) - self.validations.register( - id=task_id, entry_point=task_entrypoint, kwargs=task_kwargs - ) - - def run(self, output_file: str) -> pd.DataFrame: - """Run the benchmark. - - Parameters - ---------- - output_file : str - Path to write results output file to. - - Returns - ------- - pandas DataFrame of results, with columns; "validation_id", "model_id", - "runtime_secs", and columns for each of the metrics returned by the - benchmark task. - """ - results_df = self.kotsu_run(self.estimators, self.validations, output_file) - return results_df diff --git a/aeon/benchmarking/utils.py b/aeon/benchmarking/utils.py deleted file mode 100644 index db5d236934..0000000000 --- a/aeon/benchmarking/utils.py +++ /dev/null @@ -1,189 +0,0 @@ -"""Utils for benchmarking.""" - - -def get_qalpha(alpha: float): - """Get the alpha value for post hoc Nemenyi.""" - if alpha == 0.01: - qalpha = [ - 0.000, - 2.576, - 2.913, - 3.113, - 3.255, - 3.364, - 3.452, - 3.526, - 3.590, - 3.646, - 3.696, - 3.741, - 3.781, - 3.818, - 3.853, - 3.884, - 3.914, - 3.941, - 3.967, - 3.992, - 4.015, - 4.037, - 4.057, - 4.077, - 4.096, - 4.114, - 4.132, - 4.148, - 4.164, - 4.179, - 4.194, - 4.208, - 4.222, - 4.236, - 4.249, - 4.261, - 4.273, - 4.285, - 4.296, - 4.307, - 4.318, - 4.329, - 4.339, - 4.349, - 4.359, - 4.368, - 4.378, - 4.387, - 4.395, - 4.404, - 4.412, - 4.420, - 4.428, - 4.435, - 4.442, - 4.449, - 4.456, - ] - elif alpha == 0.05: - qalpha = [ - 0.000, - 1.960, - 2.344, - 2.569, - 2.728, - 2.850, - 2.948, - 3.031, - 3.102, - 3.164, - 3.219, - 3.268, - 3.313, - 3.354, - 3.391, - 3.426, - 3.458, - 3.489, - 3.517, - 3.544, - 3.569, - 3.593, - 3.616, - 3.637, - 3.658, - 3.678, - 3.696, - 3.714, - 3.732, - 3.749, - 3.765, - 3.780, - 3.795, - 3.810, - 3.824, - 3.837, - 3.850, - 3.863, - 3.876, - 3.888, - 3.899, - 3.911, - 3.922, - 3.933, - 3.943, - 3.954, - 3.964, - 3.973, - 3.983, - 3.992, - 4.001, - 4.009, - 4.017, - 4.025, - 4.032, - 4.040, - 4.046, - ] - elif alpha == 0.1: - qalpha = [ - 0.000, - 1.645, - 2.052, - 2.291, - 2.460, - 2.589, - 2.693, - 2.780, - 2.855, - 2.920, - 2.978, - 3.030, - 3.077, - 3.120, - 3.159, - 3.196, - 3.230, - 3.261, - 3.291, - 3.319, - 3.346, - 3.371, - 3.394, - 3.417, - 3.439, - 3.459, - 3.479, - 3.498, - 3.516, - 3.533, - 3.550, - 3.567, - 3.582, - 3.597, - 3.612, - 3.626, - 3.640, - 3.653, - 3.666, - 3.679, - 3.691, - 3.703, - 3.714, - 3.726, - 3.737, - 3.747, - 3.758, - 3.768, - 3.778, - 3.788, - 3.797, - 3.806, - 3.814, - 3.823, - 3.831, - 3.838, - 3.846, - ] - # - else: - raise Exception("alpha must be 0.01, 0.05 or 0.1") - return qalpha diff --git a/aeon/performance_metrics/stats.py b/aeon/performance_metrics/stats.py index 0041052311..800279d9da 100644 --- a/aeon/performance_metrics/stats.py +++ b/aeon/performance_metrics/stats.py @@ -9,8 +9,6 @@ import numpy as np from scipy.stats import distributions, find_repeats, wilcoxon -from aeon.benchmarking.utils import get_qalpha - def check_friedman(ranks): """ @@ -71,7 +69,7 @@ def nemenyi_test(ordered_avg_ranks, n_datasets, alpha): significant difference. """ n_estimators = len(ordered_avg_ranks) - qalpha = get_qalpha(alpha) + qalpha = _get_qalpha(alpha) # calculate critical difference with Nemenyi cd = qalpha[n_estimators] * np.sqrt( n_estimators * (n_estimators + 1) / (6 * n_datasets) @@ -86,6 +84,194 @@ def nemenyi_test(ordered_avg_ranks, n_datasets, alpha): return cliques +def _get_qalpha(alpha: float): + """Get the alpha value for post hoc Nemenyi.""" + if alpha == 0.01: + qalpha = [ + 0.000, + 2.576, + 2.913, + 3.113, + 3.255, + 3.364, + 3.452, + 3.526, + 3.590, + 3.646, + 3.696, + 3.741, + 3.781, + 3.818, + 3.853, + 3.884, + 3.914, + 3.941, + 3.967, + 3.992, + 4.015, + 4.037, + 4.057, + 4.077, + 4.096, + 4.114, + 4.132, + 4.148, + 4.164, + 4.179, + 4.194, + 4.208, + 4.222, + 4.236, + 4.249, + 4.261, + 4.273, + 4.285, + 4.296, + 4.307, + 4.318, + 4.329, + 4.339, + 4.349, + 4.359, + 4.368, + 4.378, + 4.387, + 4.395, + 4.404, + 4.412, + 4.420, + 4.428, + 4.435, + 4.442, + 4.449, + 4.456, + ] + elif alpha == 0.05: + qalpha = [ + 0.000, + 1.960, + 2.344, + 2.569, + 2.728, + 2.850, + 2.948, + 3.031, + 3.102, + 3.164, + 3.219, + 3.268, + 3.313, + 3.354, + 3.391, + 3.426, + 3.458, + 3.489, + 3.517, + 3.544, + 3.569, + 3.593, + 3.616, + 3.637, + 3.658, + 3.678, + 3.696, + 3.714, + 3.732, + 3.749, + 3.765, + 3.780, + 3.795, + 3.810, + 3.824, + 3.837, + 3.850, + 3.863, + 3.876, + 3.888, + 3.899, + 3.911, + 3.922, + 3.933, + 3.943, + 3.954, + 3.964, + 3.973, + 3.983, + 3.992, + 4.001, + 4.009, + 4.017, + 4.025, + 4.032, + 4.040, + 4.046, + ] + elif alpha == 0.1: + qalpha = [ + 0.000, + 1.645, + 2.052, + 2.291, + 2.460, + 2.589, + 2.693, + 2.780, + 2.855, + 2.920, + 2.978, + 3.030, + 3.077, + 3.120, + 3.159, + 3.196, + 3.230, + 3.261, + 3.291, + 3.319, + 3.346, + 3.371, + 3.394, + 3.417, + 3.439, + 3.459, + 3.479, + 3.498, + 3.516, + 3.533, + 3.550, + 3.567, + 3.582, + 3.597, + 3.612, + 3.626, + 3.640, + 3.653, + 3.666, + 3.679, + 3.691, + 3.703, + 3.714, + 3.726, + 3.737, + 3.747, + 3.758, + 3.768, + 3.778, + 3.788, + 3.797, + 3.806, + 3.814, + 3.823, + 3.831, + 3.838, + 3.846, + ] + # + else: + raise Exception("alpha must be 0.01, 0.05 or 0.1") + return qalpha + + def wilcoxon_test(results, labels, lower_better=False): """ Perform Wilcoxon test. diff --git a/pyproject.toml b/pyproject.toml index 1470f4f6ae..e689a239e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,6 @@ all_extras = [ "esig>=0.9.7; platform_system != 'Darwin' and python_version < '3.11'", "imbalanced-learn", "keras-self-attention", - "kotsu>=0.3.1", "matplotlib>=3.3.2,<3.9.0", "pydot>=2.0.0", "scikit_posthocs>=0.6.5",