Skip to content

Commit

Permalink
[ENH] MrSEQLClassifier wrapper (#1756)
Browse files Browse the repository at this point in the history
* mrseql

* dependencies

* params

* Empty commit for CI

* docs

* Automatic `pre-commit` fixes

* import

---------

Co-authored-by: MatthewMiddlehurst <[email protected]>
Co-authored-by: Tony Bagnall <[email protected]>
Co-authored-by: TonyBagnall <[email protected]>
  • Loading branch information
4 people authored Sep 18, 2024
1 parent c6dfffa commit 11e3408
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 32 deletions.
2 changes: 2 additions & 0 deletions aeon/classification/dictionary_based/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@
"MUSE",
"REDCOMETS",
"MrSQMClassifier",
"MrSEQLClassifier",
]

from aeon.classification.dictionary_based._boss import BOSSEnsemble, IndividualBOSS
from aeon.classification.dictionary_based._cboss import ContractableBOSS
from aeon.classification.dictionary_based._mrseql import MrSEQLClassifier
from aeon.classification.dictionary_based._mrsqm import MrSQMClassifier
from aeon.classification.dictionary_based._muse import MUSE
from aeon.classification.dictionary_based._redcomets import REDCOMETS
Expand Down
130 changes: 130 additions & 0 deletions aeon/classification/dictionary_based/_mrseql.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
"""Multiple Representations Sequence Learning (MrSEQL) Classifier."""

__maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"]
__all__ = ["MrSEQLClassifier"]

from typing import Union

import numpy as np
import pandas as pd

from aeon.classification import BaseClassifier


def _from_numpy3d_to_nested_dataframe(X):
"""Convert numpy3D collection to a pd.DataFrame where each cell is a series."""
n_cases, n_channels, n_timepoints = X.shape
array_type = X.dtype
container = pd.Series
column_names = [f"channel_{i}" for i in range(n_channels)]
column_list = []
for j, column in enumerate(column_names):
nested_column = (
pd.DataFrame(X[:, j, :])
.apply(lambda x: [container(x, dtype=array_type)], axis=1)
.str[0]
.rename(column)
)
column_list.append(nested_column)
df = pd.concat(column_list, axis=1)
return df


class MrSEQLClassifier(BaseClassifier):
"""
Multiple Representations Sequence Learning (MrSEQL) Classifier.
This is a wrapper for the MrSEQLClassifier algorithm from the `mrseql` package.
MrSEQL is not included in ``all_extras`` as it requires gcc and fftw
(http://www.fftw.org/index.html) to be installed for Windows and some Linux OS.
Overview: MrSEQL extends the symbolic sequence classifier (SEQL) to work with
multiple symbolic representations of time series, using features extracted from the
SAX and SFA transformations.
Parameters
----------
seql_mode : "clf" or "fs", default="fs".
If "fs", trains a logistic regression model with features extracted by SEQL.
IF "clf", builds an ensemble of SEQL models
symrep : "sax" or "sfa", or ["sax", "sfa"], default = "sax"
The symbolic features to extract from the time series.
custom_config : dict, default=None
Additional configuration for the symbolic transformations. See the original
package for details. ``symrep`` will be ignored if used.
References
----------
.. [1] Le Nguyen, Thach, et al. "Interpretable time series classification using
linear models and multi-resolution multi-domain symbolic representations."
Data mining and knowledge discovery 33 (2019): 1183-1222.
Examples
--------
>>> from aeon.classification.dictionary_based import MrSEQLClassifier
>>> from aeon.testing.data_generation import make_example_3d_numpy
>>> X, y = make_example_3d_numpy(random_state=0)
>>> clf = MrSEQLClassifier(random_state=0) # doctest: +SKIP
>>> clf.fit(X, y) # doctest: +SKIP
MrSEQLClassifier(...)
>>> clf.predict(X) # doctest: +SKIP
"""

_tags = {
"X_inner_type": "numpy3D",
"algorithm_type": "dictionary",
"python_dependencies": "mrseql",
}

def __init__(self, seql_mode="fs", symrep=("sax"), custom_config=None) -> None:
self.seql_mode = seql_mode
self.symrep = symrep
self.custom_config = custom_config

super().__init__()

def _fit(self, X, y):
from mrseql import MrSEQLClassifier

_X = _from_numpy3d_to_nested_dataframe(X)

self.clf_ = MrSEQLClassifier(
seql_mode=self.seql_mode,
symrep=self.symrep,
custom_config=self.custom_config,
)
self.clf_.fit(_X, y)

return self

def _predict(self, X) -> np.ndarray:
_X = _from_numpy3d_to_nested_dataframe(X)
return self.clf_.predict(_X)

def _predict_proba(self, X) -> np.ndarray:
_X = _from_numpy3d_to_nested_dataframe(X)
return self.clf_.predict_proba(_X)

@classmethod
def get_test_params(cls, parameter_set: str = "default") -> Union[dict, list[dict]]:
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
For classifiers, a "default" set of parameters should be provided for
general testing, and a "results_comparison" set for comparing against
previously recorded results if the general set does not produce suitable
probabilities to compare against.
Returns
-------
params : dict or list of dict, default={}
Parameters to create testing instances of the class.
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`.
"""
return {}
38 changes: 6 additions & 32 deletions aeon/classification/dictionary_based/_mrsqm.py
Original file line number Diff line number Diff line change
@@ -1,41 +1,21 @@
"""Multiple Representations Sequence Miner (MrSQM) Classifier."""

__maintainer__ = ["TonyBagnall"]
__maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"]
__all__ = ["MrSQMClassifier"]

from typing import Union

import numpy as np
import pandas as pd

from aeon.classification import BaseClassifier


def _from_numpy3d_to_nested_dataframe(X):
"""Convert numpy3D collection to a pd.DataFrame where each cell is a series."""
n_cases, n_channels, n_timepoints = X.shape
array_type = X.dtype
container = pd.Series
column_names = [f"channel_{i}" for i in range(n_channels)]
column_list = []
for j, column in enumerate(column_names):
nested_column = (
pd.DataFrame(X[:, j, :])
.apply(lambda x: [container(x, dtype=array_type)], axis=1)
.str[0]
.rename(column)
)
column_list.append(nested_column)
df = pd.concat(column_list, axis=1)
return df


class MrSQMClassifier(BaseClassifier):
"""
Multiple Representations Sequence Miner (MrSQM) classifier.
This is a wrapper for the MrSQMClassifier algorithm from the `mrsqm` package.
MrSQM is not included in all extras as it requires gcc and fftw
MrSQM is not included in ``all_extras`` as it requires gcc and fftw
(http://www.fftw.org/index.html) to be installed for Windows and some Linux OS.
Overview: MrSQM is a time series classifier utilising symbolic
Expand Down Expand Up @@ -78,9 +58,6 @@ class MrSQMClassifier(BaseClassifier):
.. [1] Nguyen, Thach Le, and Georgiana Ifrim. "Fast time series classification with
random symbolic subsequences." Advanced Analytics and Learning on Temporal Data:
7th ECML PKDD Workshop, AALTD 2022, Grenoble, France, September 19–23, 2022.
.. [2] Nguyen, Thach Le, and Georgiana Ifrim. "MrSQM: Fast time series
classification with symbolic representations." arXiv preprint arXiv:2109.01036
(2021).
Examples
--------
Expand All @@ -95,7 +72,7 @@ class MrSQMClassifier(BaseClassifier):

_tags = {
"X_inner_type": "numpy3D",
"algorithm_type": "shapelet",
"algorithm_type": "dictionary",
"cant-pickle": True,
"python_dependencies": "mrsqm",
}
Expand Down Expand Up @@ -125,7 +102,6 @@ def __init__(
def _fit(self, X, y):
from mrsqm import MrSQMClassifier

_X = _from_numpy3d_to_nested_dataframe(X)
self.clf_ = MrSQMClassifier(
strat=self.strat,
features_per_rep=self.features_per_rep,
Expand All @@ -136,17 +112,15 @@ def _fit(self, X, y):
custom_config=self.custom_config,
random_state=self.random_state,
)
self.clf_.fit(_X, y)
self.clf_.fit(X, y)

return self

def _predict(self, X) -> np.ndarray:
_X = _from_numpy3d_to_nested_dataframe(X)
return self.clf_.predict(_X)
return self.clf_.predict(X)

def _predict_proba(self, X) -> np.ndarray:
_X = _from_numpy3d_to_nested_dataframe(X)
return self.clf_.predict_proba(_X)
return self.clf_.predict_proba(X)

@classmethod
def get_test_params(cls, parameter_set: str = "default") -> Union[dict, list[dict]]:
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ dl = [
]
unstable_extras = [
"mrsqm>=0.0.7,<0.1.0; platform_system != 'Windows' and python_version < '3.12'", # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
"mrseql>=0.0.4,<0.1.0; platform_system != 'Windows' and python_version < '3.12'", # requires gcc and fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
"pycatch22", # known to fail installation on some setups
"pyfftw>=0.12.0; python_version < '3.12'", # requires fftw to be installed for Windows and some other OS (see http://www.fftw.org/index.html)
]
Expand Down

0 comments on commit 11e3408

Please sign in to comment.