From af14cd32fe013c89de3d34d273d7e8778dbd75b4 Mon Sep 17 00:00:00 2001
From: Matthew Middlehurst <pfm15hbu@gmail.com>
Date: Mon, 23 Sep 2024 23:35:00 +0300
Subject: [PATCH] benchmarking (#2082)

---
 aeon/benchmarking/benchmarks.py   |  80 -------------
 aeon/benchmarking/utils.py        | 189 -----------------------------
 aeon/performance_metrics/stats.py | 192 +++++++++++++++++++++++++++++-
 pyproject.toml                    |   1 -
 4 files changed, 189 insertions(+), 273 deletions(-)
 delete mode 100644 aeon/benchmarking/benchmarks.py
 delete mode 100644 aeon/benchmarking/utils.py

diff --git a/aeon/benchmarking/benchmarks.py b/aeon/benchmarking/benchmarks.py
deleted file mode 100644
index b8f1478741..0000000000
--- a/aeon/benchmarking/benchmarks.py
+++ /dev/null
@@ -1,80 +0,0 @@
-"""Benchmarking interface for use with aeon objects.
-
-Wraps kotsu benchmarking package.
-"""
-
-from typing import Callable, Optional, Union
-from warnings import warn
-
-import pandas as pd
-
-from aeon.base import BaseEstimator
-from aeon.utils.validation._dependencies import _check_soft_dependencies
-
-
-class BaseBenchmark:
-    """Base class for benchmarks.
-
-    A benchmark consists of a set of tasks and a set of estimators.
-    """
-
-    def __init__(self):
-        warn("benchmarking is experimental and interfaces may change.")
-        _check_soft_dependencies("kotsu")
-        import kotsu
-
-        self.estimators = kotsu.registration.ModelRegistry()
-        self.validations = kotsu.registration.ValidationRegistry()
-        self.kotsu_run = kotsu.run.run
-
-    def add_estimator(
-        self,
-        estimator: BaseEstimator,
-        estimator_id: Optional[str] = None,
-    ):
-        """Register an estimator to the benchmark.
-
-        Parameters
-        ----------
-        estimator : BaseEstimator object
-            Estimator to add to the benchmark.
-        estimator_id : str, default=None
-            Identifier for estimator. If none given then uses estimator's class name.
-
-        """
-        estimator_id = estimator_id or f"{estimator.__class__.__name__}-v1"
-        estimator = estimator.clone()  # extra cautious
-        self.estimators.register(id=estimator_id, entry_point=estimator.clone)
-
-    def _add_task(
-        self,
-        task_entrypoint: Union[Callable, str],
-        task_kwargs: Optional[dict] = None,
-        task_id: Optional[str] = None,
-    ):
-        """Register a task to the benchmark."""
-        task_id = task_id or (
-            f"{task_entrypoint}-v1"
-            if isinstance(task_entrypoint, str)
-            else f"{task_entrypoint.__name__}-v1"
-        )
-        self.validations.register(
-            id=task_id, entry_point=task_entrypoint, kwargs=task_kwargs
-        )
-
-    def run(self, output_file: str) -> pd.DataFrame:
-        """Run the benchmark.
-
-        Parameters
-        ----------
-        output_file : str
-            Path to write results output file to.
-
-        Returns
-        -------
-        pandas DataFrame of results, with columns; "validation_id", "model_id",
-            "runtime_secs", and columns for each of the metrics returned by the
-            benchmark task.
-        """
-        results_df = self.kotsu_run(self.estimators, self.validations, output_file)
-        return results_df
diff --git a/aeon/benchmarking/utils.py b/aeon/benchmarking/utils.py
deleted file mode 100644
index db5d236934..0000000000
--- a/aeon/benchmarking/utils.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""Utils for benchmarking."""
-
-
-def get_qalpha(alpha: float):
-    """Get the alpha value for post hoc Nemenyi."""
-    if alpha == 0.01:
-        qalpha = [
-            0.000,
-            2.576,
-            2.913,
-            3.113,
-            3.255,
-            3.364,
-            3.452,
-            3.526,
-            3.590,
-            3.646,
-            3.696,
-            3.741,
-            3.781,
-            3.818,
-            3.853,
-            3.884,
-            3.914,
-            3.941,
-            3.967,
-            3.992,
-            4.015,
-            4.037,
-            4.057,
-            4.077,
-            4.096,
-            4.114,
-            4.132,
-            4.148,
-            4.164,
-            4.179,
-            4.194,
-            4.208,
-            4.222,
-            4.236,
-            4.249,
-            4.261,
-            4.273,
-            4.285,
-            4.296,
-            4.307,
-            4.318,
-            4.329,
-            4.339,
-            4.349,
-            4.359,
-            4.368,
-            4.378,
-            4.387,
-            4.395,
-            4.404,
-            4.412,
-            4.420,
-            4.428,
-            4.435,
-            4.442,
-            4.449,
-            4.456,
-        ]
-    elif alpha == 0.05:
-        qalpha = [
-            0.000,
-            1.960,
-            2.344,
-            2.569,
-            2.728,
-            2.850,
-            2.948,
-            3.031,
-            3.102,
-            3.164,
-            3.219,
-            3.268,
-            3.313,
-            3.354,
-            3.391,
-            3.426,
-            3.458,
-            3.489,
-            3.517,
-            3.544,
-            3.569,
-            3.593,
-            3.616,
-            3.637,
-            3.658,
-            3.678,
-            3.696,
-            3.714,
-            3.732,
-            3.749,
-            3.765,
-            3.780,
-            3.795,
-            3.810,
-            3.824,
-            3.837,
-            3.850,
-            3.863,
-            3.876,
-            3.888,
-            3.899,
-            3.911,
-            3.922,
-            3.933,
-            3.943,
-            3.954,
-            3.964,
-            3.973,
-            3.983,
-            3.992,
-            4.001,
-            4.009,
-            4.017,
-            4.025,
-            4.032,
-            4.040,
-            4.046,
-        ]
-    elif alpha == 0.1:
-        qalpha = [
-            0.000,
-            1.645,
-            2.052,
-            2.291,
-            2.460,
-            2.589,
-            2.693,
-            2.780,
-            2.855,
-            2.920,
-            2.978,
-            3.030,
-            3.077,
-            3.120,
-            3.159,
-            3.196,
-            3.230,
-            3.261,
-            3.291,
-            3.319,
-            3.346,
-            3.371,
-            3.394,
-            3.417,
-            3.439,
-            3.459,
-            3.479,
-            3.498,
-            3.516,
-            3.533,
-            3.550,
-            3.567,
-            3.582,
-            3.597,
-            3.612,
-            3.626,
-            3.640,
-            3.653,
-            3.666,
-            3.679,
-            3.691,
-            3.703,
-            3.714,
-            3.726,
-            3.737,
-            3.747,
-            3.758,
-            3.768,
-            3.778,
-            3.788,
-            3.797,
-            3.806,
-            3.814,
-            3.823,
-            3.831,
-            3.838,
-            3.846,
-        ]
-        #
-    else:
-        raise Exception("alpha must be 0.01, 0.05 or 0.1")
-    return qalpha
diff --git a/aeon/performance_metrics/stats.py b/aeon/performance_metrics/stats.py
index 0041052311..800279d9da 100644
--- a/aeon/performance_metrics/stats.py
+++ b/aeon/performance_metrics/stats.py
@@ -9,8 +9,6 @@
 import numpy as np
 from scipy.stats import distributions, find_repeats, wilcoxon
 
-from aeon.benchmarking.utils import get_qalpha
-
 
 def check_friedman(ranks):
     """
@@ -71,7 +69,7 @@ def nemenyi_test(ordered_avg_ranks, n_datasets, alpha):
         significant difference.
     """
     n_estimators = len(ordered_avg_ranks)
-    qalpha = get_qalpha(alpha)
+    qalpha = _get_qalpha(alpha)
     # calculate critical difference with Nemenyi
     cd = qalpha[n_estimators] * np.sqrt(
         n_estimators * (n_estimators + 1) / (6 * n_datasets)
@@ -86,6 +84,194 @@ def nemenyi_test(ordered_avg_ranks, n_datasets, alpha):
     return cliques
 
 
+def _get_qalpha(alpha: float):
+    """Get the alpha value for post hoc Nemenyi."""
+    if alpha == 0.01:
+        qalpha = [
+            0.000,
+            2.576,
+            2.913,
+            3.113,
+            3.255,
+            3.364,
+            3.452,
+            3.526,
+            3.590,
+            3.646,
+            3.696,
+            3.741,
+            3.781,
+            3.818,
+            3.853,
+            3.884,
+            3.914,
+            3.941,
+            3.967,
+            3.992,
+            4.015,
+            4.037,
+            4.057,
+            4.077,
+            4.096,
+            4.114,
+            4.132,
+            4.148,
+            4.164,
+            4.179,
+            4.194,
+            4.208,
+            4.222,
+            4.236,
+            4.249,
+            4.261,
+            4.273,
+            4.285,
+            4.296,
+            4.307,
+            4.318,
+            4.329,
+            4.339,
+            4.349,
+            4.359,
+            4.368,
+            4.378,
+            4.387,
+            4.395,
+            4.404,
+            4.412,
+            4.420,
+            4.428,
+            4.435,
+            4.442,
+            4.449,
+            4.456,
+        ]
+    elif alpha == 0.05:
+        qalpha = [
+            0.000,
+            1.960,
+            2.344,
+            2.569,
+            2.728,
+            2.850,
+            2.948,
+            3.031,
+            3.102,
+            3.164,
+            3.219,
+            3.268,
+            3.313,
+            3.354,
+            3.391,
+            3.426,
+            3.458,
+            3.489,
+            3.517,
+            3.544,
+            3.569,
+            3.593,
+            3.616,
+            3.637,
+            3.658,
+            3.678,
+            3.696,
+            3.714,
+            3.732,
+            3.749,
+            3.765,
+            3.780,
+            3.795,
+            3.810,
+            3.824,
+            3.837,
+            3.850,
+            3.863,
+            3.876,
+            3.888,
+            3.899,
+            3.911,
+            3.922,
+            3.933,
+            3.943,
+            3.954,
+            3.964,
+            3.973,
+            3.983,
+            3.992,
+            4.001,
+            4.009,
+            4.017,
+            4.025,
+            4.032,
+            4.040,
+            4.046,
+        ]
+    elif alpha == 0.1:
+        qalpha = [
+            0.000,
+            1.645,
+            2.052,
+            2.291,
+            2.460,
+            2.589,
+            2.693,
+            2.780,
+            2.855,
+            2.920,
+            2.978,
+            3.030,
+            3.077,
+            3.120,
+            3.159,
+            3.196,
+            3.230,
+            3.261,
+            3.291,
+            3.319,
+            3.346,
+            3.371,
+            3.394,
+            3.417,
+            3.439,
+            3.459,
+            3.479,
+            3.498,
+            3.516,
+            3.533,
+            3.550,
+            3.567,
+            3.582,
+            3.597,
+            3.612,
+            3.626,
+            3.640,
+            3.653,
+            3.666,
+            3.679,
+            3.691,
+            3.703,
+            3.714,
+            3.726,
+            3.737,
+            3.747,
+            3.758,
+            3.768,
+            3.778,
+            3.788,
+            3.797,
+            3.806,
+            3.814,
+            3.823,
+            3.831,
+            3.838,
+            3.846,
+        ]
+        #
+    else:
+        raise Exception("alpha must be 0.01, 0.05 or 0.1")
+    return qalpha
+
+
 def wilcoxon_test(results, labels, lower_better=False):
     """
     Perform Wilcoxon test.
diff --git a/pyproject.toml b/pyproject.toml
index 1470f4f6ae..e689a239e7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,7 +60,6 @@ all_extras = [
     "esig>=0.9.7; platform_system != 'Darwin' and python_version < '3.11'",
     "imbalanced-learn",
     "keras-self-attention",
-    "kotsu>=0.3.1",
     "matplotlib>=3.3.2,<3.9.0",
     "pydot>=2.0.0",
     "scikit_posthocs>=0.6.5",