[ENH] MrSEQLClassifier wrapper (#1756)

* mrseql * dependencies * params * Empty commit for CI * docs * Automatic `pre-commit` fixes * import --------- Co-authored-by: MatthewMiddlehurst <[email protected]> Co-authored-by: Tony Bagnall <[email protected]> Co-authored-by: TonyBagnall <[email protected]>
aeon-toolkit · Sep 18, 2024 · 11e3408 · 11e3408
1 parent c6dfffa
commit 11e3408
Show file tree

Hide file tree

Showing 4 changed files with 139 additions and 32 deletions.
diff --git a/aeon/classification/dictionary_based/__init__.py b/aeon/classification/dictionary_based/__init__.py
@@ -11,10 +11,12 @@
     "MUSE",
     "REDCOMETS",
     "MrSQMClassifier",
+    "MrSEQLClassifier",
 ]
 
 from aeon.classification.dictionary_based._boss import BOSSEnsemble, IndividualBOSS
 from aeon.classification.dictionary_based._cboss import ContractableBOSS
+from aeon.classification.dictionary_based._mrseql import MrSEQLClassifier
 from aeon.classification.dictionary_based._mrsqm import MrSQMClassifier
 from aeon.classification.dictionary_based._muse import MUSE
 from aeon.classification.dictionary_based._redcomets import REDCOMETS

diff --git a/aeon/classification/dictionary_based/_mrseql.py b/aeon/classification/dictionary_based/_mrseql.py
@@ -0,0 +1,130 @@
+"""Multiple Representations Sequence Learning (MrSEQL) Classifier."""
+
+__maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"]
+__all__ = ["MrSEQLClassifier"]
+
+from typing import Union
+
+import numpy as np
+import pandas as pd
+
+from aeon.classification import BaseClassifier
+
+
+def _from_numpy3d_to_nested_dataframe(X):
+    """Convert numpy3D collection to a pd.DataFrame where each cell is a series."""
+    n_cases, n_channels, n_timepoints = X.shape
+    array_type = X.dtype
+    container = pd.Series
+    column_names = [f"channel_{i}" for i in range(n_channels)]
+    column_list = []
+    for j, column in enumerate(column_names):
+        nested_column = (
+            pd.DataFrame(X[:, j, :])
+            .apply(lambda x: [container(x, dtype=array_type)], axis=1)
+            .str[0]
+            .rename(column)
+        )
+        column_list.append(nested_column)
+    df = pd.concat(column_list, axis=1)
+    return df
+
+
+class MrSEQLClassifier(BaseClassifier):
+    """
+    Multiple Representations Sequence Learning (MrSEQL) Classifier.
+
+    This is a wrapper for the MrSEQLClassifier algorithm from the `mrseql` package.
+    MrSEQL is not included in ``all_extras`` as it requires gcc and fftw
+    (http://www.fftw.org/index.html) to be installed for Windows and some Linux OS.
+
+    Overview: MrSEQL extends the symbolic sequence classifier (SEQL) to work with
+    multiple symbolic representations of time series, using features extracted from the
+    SAX and SFA transformations.
+
+    Parameters
+    ----------
+    seql_mode : "clf" or "fs", default="fs".
+        If "fs", trains a logistic regression model with features extracted by SEQL.
+        IF "clf", builds an ensemble of SEQL models
+    symrep : "sax" or "sfa", or ["sax", "sfa"], default = "sax"
+        The symbolic features to extract from the time series.
+    custom_config : dict, default=None
+        Additional configuration for the symbolic transformations. See the original
+        package for details. ``symrep`` will be ignored if used.
+
+    References
+    ----------
+    .. [1] Le Nguyen, Thach, et al. "Interpretable time series classification using
+        linear models and multi-resolution multi-domain symbolic representations."
+        Data mining and knowledge discovery 33 (2019): 1183-1222.
+
+    Examples
+    --------
+    >>> from aeon.classification.dictionary_based import MrSEQLClassifier
+    >>> from aeon.testing.data_generation import make_example_3d_numpy
+    >>> X, y = make_example_3d_numpy(random_state=0)
+    >>> clf = MrSEQLClassifier(random_state=0) # doctest: +SKIP
+    >>> clf.fit(X, y) # doctest: +SKIP
+    MrSEQLClassifier(...)
+    >>> clf.predict(X) # doctest: +SKIP
+    """
+
+    _tags = {
+        "X_inner_type": "numpy3D",
+        "algorithm_type": "dictionary",
+        "python_dependencies": "mrseql",
+    }
+
+    def __init__(self, seql_mode="fs", symrep=("sax"), custom_config=None) -> None:
+        self.seql_mode = seql_mode
+        self.symrep = symrep
+        self.custom_config = custom_config
+
+        super().__init__()
+
+    def _fit(self, X, y):
+        from mrseql import MrSEQLClassifier
+
+        _X = _from_numpy3d_to_nested_dataframe(X)
+
+        self.clf_ = MrSEQLClassifier(
+            seql_mode=self.seql_mode,
+            symrep=self.symrep,
+            custom_config=self.custom_config,
+        )
+        self.clf_.fit(_X, y)
+
+        return self
+
+    def _predict(self, X) -> np.ndarray:
+        _X = _from_numpy3d_to_nested_dataframe(X)
+        return self.clf_.predict(_X)
+
+    def _predict_proba(self, X) -> np.ndarray:
+        _X = _from_numpy3d_to_nested_dataframe(X)
+        return self.clf_.predict_proba(_X)
+
+    @classmethod
+    def get_test_params(cls, parameter_set: str = "default") -> Union[dict, list[dict]]:
+        """Return testing parameter settings for the estimator.
+
+        Parameters
+        ----------
+        parameter_set : str, default="default"
+            Name of the set of test parameters to return, for use in tests. If no
+            special parameters are defined for a value, will return `"default"` set.
+            For classifiers, a "default" set of parameters should be provided for
+            general testing, and a "results_comparison" set for comparing against
+            previously recorded results if the general set does not produce suitable
+            probabilities to compare against.
+
+        Returns
+        -------
+        params : dict or list of dict, default={}
+            Parameters to create testing instances of the class.
+            Each dict are parameters to construct an "interesting" test instance, i.e.,
+            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
+            `create_test_instance` uses the first (or only) dictionary in `params`.
+        """
+        return {}
diff --git a/aeon/classification/dictionary_based/_mrsqm.py b/aeon/classification/dictionary_based/_mrsqm.py
@@ -1,41 +1,21 @@
 """Multiple Representations Sequence Miner (MrSQM) Classifier."""
 
-__maintainer__ = ["TonyBagnall"]
+__maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"]
 __all__ = ["MrSQMClassifier"]
 
 from typing import Union
 
 import numpy as np
-import pandas as pd
 
 from aeon.classification import BaseClassifier
 
 
-def _from_numpy3d_to_nested_dataframe(X):
-    """Convert numpy3D collection to a pd.DataFrame where each cell is a series."""
-    n_cases, n_channels, n_timepoints = X.shape
-    array_type = X.dtype
-    container = pd.Series
-    column_names = [f"channel_{i}" for i in range(n_channels)]
-    column_list = []
-    for j, column in enumerate(column_names):
-        nested_column = (
-            pd.DataFrame(X[:, j, :])
-            .apply(lambda x: [container(x, dtype=array_type)], axis=1)
-            .str[0]
-            .rename(column)
-        )
-        column_list.append(nested_column)
-    df = pd.concat(column_list, axis=1)
-    return df
-
-
 class MrSQMClassifier(BaseClassifier):
     """
     Multiple Representations Sequence Miner (MrSQM) classifier.
 
     This is a wrapper for the MrSQMClassifier algorithm from the `mrsqm` package.
-    MrSQM is not included in all extras as it requires gcc and fftw
+    MrSQM is not included in ``all_extras`` as it requires gcc and fftw
     (http://www.fftw.org/index.html) to be installed for Windows and some Linux OS.
 
     Overview: MrSQM is a time series classifier utilising symbolic
@@ -78,9 +58,6 @@ class MrSQMClassifier(BaseClassifier):
     .. [1] Nguyen, Thach Le, and Georgiana Ifrim. "Fast time series classification with
         random symbolic subsequences." Advanced Analytics and Learning on Temporal Data:
         7th ECML PKDD Workshop, AALTD 2022, Grenoble, France, September 19–23, 2022.
-    .. [2] Nguyen, Thach Le, and Georgiana Ifrim. "MrSQM: Fast time series
-        classification with symbolic representations." arXiv preprint arXiv:2109.01036
-        (2021).
 
     Examples
     --------
@@ -95,7 +72,7 @@ class MrSQMClassifier(BaseClassifier):
 
     _tags = {
         "X_inner_type": "numpy3D",
-        "algorithm_type": "shapelet",
+        "algorithm_type": "dictionary",
         "cant-pickle": True,
         "python_dependencies": "mrsqm",
     }
@@ -125,7 +102,6 @@ def __init__(
     def _fit(self, X, y):
         from mrsqm import MrSQMClassifier
 
-        _X = _from_numpy3d_to_nested_dataframe(X)
         self.clf_ = MrSQMClassifier(
             strat=self.strat,
             features_per_rep=self.features_per_rep,
@@ -136,17 +112,15 @@ def _fit(self, X, y):
             custom_config=self.custom_config,
             random_state=self.random_state,
         )
-        self.clf_.fit(_X, y)
+        self.clf_.fit(X, y)
 
         return self
 
     def _predict(self, X) -> np.ndarray:
-        _X = _from_numpy3d_to_nested_dataframe(X)
-        return self.clf_.predict(_X)
+        return self.clf_.predict(X)
 
     def _predict_proba(self, X) -> np.ndarray:
-        _X = _from_numpy3d_to_nested_dataframe(X)
-        return self.clf_.predict_proba(_X)
+        return self.clf_.predict_proba(X)
 
     @classmethod
     def get_test_params(cls, parameter_set: str = "default") -> Union[dict, list[dict]]:

diff --git a/pyproject.toml b/pyproject.toml
@@ -81,6 +81,7 @@ dl = [
 ]
 unstable_extras = [
     "mrsqm>=0.0.7,<0.1.0; platform_system != 'Windows' and python_version < '3.12'",  # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
+    "mrseql>=0.0.4,<0.1.0; platform_system != 'Windows' and python_version < '3.12'",   # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
     "pycatch22",  # known to fail installation on some setups
     "pyfftw>=0.12.0; python_version < '3.12'",  # requires fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
 ]