Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/aeon-toolkit/aeon into mm/d…
Browse files Browse the repository at this point in the history
…ependabot

# Conflicts:
#	sweep.yaml
  • Loading branch information
MatthewMiddlehurst committed Nov 13, 2023
2 parents 6b8d09c + 3482819 commit 7e86da2
Show file tree
Hide file tree
Showing 41 changed files with 926 additions and 659 deletions.
14 changes: 7 additions & 7 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,13 @@ repos:
args: [ "--convention=numpy" ]
additional_dependencies: [ toml, tomli ]

# not ready to be enabled yet
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.0.1
# hooks:
# - id: mypy
# files: aeon/
# additional_dependencies: [ pytest ]
# not ready to be enabled yet
# - repo: https://github.com/pre-commit/mirrors-mypy
# rev: v1.0.1
# hooks:
# - id: mypy
# files: aeon/
# additional_dependencies: [ pytest ]

- repo: https://github.com/mgedmin/check-manifest
rev: "0.49"
Expand Down
2 changes: 1 addition & 1 deletion aeon/annotation/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
"""Implements time series annotation."""
"""Time series annotation."""
2 changes: 1 addition & 1 deletion aeon/annotation/eagglo.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _fit(self, X: pd.DataFrame, y=None):
X : pd.DataFrame
Data for anomaly detection (time series).
y : pd.Series, optional
Not used for this unsupervsed method.
Not used for this unsupervised method.
Returns
-------
Expand Down
2 changes: 1 addition & 1 deletion aeon/annotation/hmm_learn/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class GaussianHMM(BaseHMMLearn):
----------
n_components : int
Number of states
covariance_type : {"sperical", "diag", "full", "tied"}, optional
covariance_type : {"spherical", "diag", "full", "tied"}, optional
The type of covariance parameters to use:
* "spherical" --- each state uses a single variance value that
applies to all features.
Expand Down
6 changes: 6 additions & 0 deletions aeon/anomaly_detection/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
"""Time Series Anomaly Detection."""
__all__ = [
"STRAY",
]

from aeon.anomaly_detection._stray import STRAY
6 changes: 3 additions & 3 deletions aeon/annotation/stray.py → aeon/anomaly_detection/_stray.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
class STRAY(BaseTransformer):
"""STRAY: robust anomaly detection in data streams with concept drift.
This is based on STRAY (Search TRace AnomalY) _[1], which is a modification
of HDoutliers _[2]. HDoutliers is a powerful algorithm for the detection of
This is based on STRAY (Search TRace AnomalY) [1]_, which is a modification
of HDoutliers [2]_. HDoutliers is a powerful algorithm for the detection of
anomalous observations in a dataset, which has (among other advantages) the
ability to detect clusters of outliers in multi-dimensional data without
requiring a model of the typical behavior of the system. However, it suffers
Expand Down Expand Up @@ -63,7 +63,7 @@ class STRAY(BaseTransformer):
Examples
--------
>>> from aeon.annotation.stray import STRAY
>>> from aeon.anomaly_detection import STRAY
>>> from aeon.datasets import load_airline
>>> from sklearn.preprocessing import MinMaxScaler
>>> import numpy as np
Expand Down
1 change: 1 addition & 0 deletions aeon/anomaly_detection/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Tests for anomaly detection."""
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
from sklearn.preprocessing import MinMaxScaler, RobustScaler

from aeon.annotation.stray import STRAY
from aeon.anomaly_detection import STRAY


def test_default_1D():
Expand Down
1 change: 1 addition & 0 deletions aeon/benchmarking/tests/test_results_loaders.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Result loading tests."""

import os

import pytest
Expand Down
16 changes: 4 additions & 12 deletions aeon/classification/shapelet_based/_rdst.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,6 @@ class RDSTClassifier(BaseClassifier):
The number of unique classes in the training set.
fit_time_ : int
The time (in milliseconds) for ``fit`` to run.
n_instances_ : int
The number of train cases in the training set.
n_dims_ : int
The number of dimensions per case in the training set.
series_length_ : int
The length of each series in the training set.
transformed_data_ : list of shape (n_estimators) of ndarray
Expand Down Expand Up @@ -120,7 +116,9 @@ class RDSTClassifier(BaseClassifier):

_tags = {
"capability:multivariate": True,
"capability:unequal_length": True,
"capability:multithreading": True,
"X_inner_type": ["np-list", "numpy3D"],
"non-deterministic": True, # due to random_state bug in MacOS #324
"algorithm_type": "shapelet",
}
Expand Down Expand Up @@ -149,10 +147,6 @@ def __init__(
self.save_transformed_data = save_transformed_data
self.random_state = random_state
self.n_jobs = n_jobs

self.n_instances_ = 0
self.n_dims_ = 0
self.series_length_ = 0
self.transformed_data_ = []

self._transformer = None
Expand Down Expand Up @@ -180,8 +174,6 @@ def _fit(self, X, y):
Changes state by creating a fitted model that updates attributes
ending in "_".
"""
self.n_instances_, self.n_dims_, self.series_length_ = X.shape

self._transformer = RandomDilatedShapeletTransform(
max_shapelets=self.max_shapelets,
shapelet_lengths=self.shapelet_lengths,
Expand Down Expand Up @@ -250,9 +242,9 @@ def _predict_proba(self, X) -> np.ndarray:
if callable(m):
return self._estimator.predict_proba(X_t)
else:
dists = np.zeros((X.shape[0], self.n_classes_))
dists = np.zeros((len(X), self.n_classes_))
preds = self._estimator.predict(X_t)
for i in range(0, X.shape[0]):
for i in range(0, len(X)):
dists[i, np.where(self.classes_ == preds[i])] = 1
return dists

Expand Down
75 changes: 61 additions & 14 deletions aeon/datasets/_data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,20 @@ def _load_header_info(file):
return meta_data


def _get_channel_strings(line, target, missing):
"""Split a string with timestamps into separate csv strings."""
channel_strings = re.sub(r"\s", "", line)
channel_strings = channel_strings.split("):")
c = len(channel_strings)
if target:
c = c - 1
for i in range(c):
channel_strings[i] = channel_strings[i] + ")"
numbers = re.findall(r"\d+\.\d+|" + missing, channel_strings[i])
channel_strings[i] = ",".join(numbers)
return channel_strings


def _load_data(file, meta_data, replace_missing_vals_with="NaN"):
"""Load data from a file with no header.
Expand Down Expand Up @@ -133,13 +147,20 @@ def _load_data(file, meta_data, replace_missing_vals_with="NaN"):
current_channels = 0
series_length = 0
y_values = []
target = False
if meta_data["classlabel"] or meta_data["targetlabel"]:
target = True
for line in file:
line = line.strip().lower()
line = line.replace("nan", replace_missing_vals_with)
line = line.replace("?", replace_missing_vals_with)
channels = line.split(":")
if meta_data["timestamps"]:
channels = _get_channel_strings(line, target, replace_missing_vals_with)
else:
channels = line.split(":")
n_cases += 1
current_channels = len(channels)
if meta_data["classlabel"] or meta_data["targetlabel"]:
if target:
current_channels -= 1
if n_cases == 1: # Find n_channels and length from first if not unequal
n_channels = current_channels
Expand Down Expand Up @@ -179,7 +200,7 @@ def _load_data(file, meta_data, replace_missing_vals_with="NaN"):
)
np_case[i] = np.array(data_series)
data.append(np_case)
if meta_data["classlabel"] or meta_data["targetlabel"]:
if target:
y_values.append(channels[n_channels])
if meta_data["equallength"]:
data = np.array(data)
Expand Down Expand Up @@ -1129,7 +1150,27 @@ def load_forecasting(name, extract_path=None, return_metadata=True):


def load_regression(name, split=None, extract_path=None, return_metadata=True):
"""Download/load forecasting problem from https://forecastingdata.org/.
"""Download/load regression problem from http://tseregression.org/.
If you want to load a problem from a local file, specify the
location in ``extract_path``. This function assumes the data is stored in format
<extract_path>/<name>/<name>_TRAIN.ts and <extract_path>/<name>/<name>_TEST.ts.
If you want to load a file directly from a full path, use the function
`load_from_tsfile`` directly. If you do not specify ``extract_path``, or if the
problem is not present in ``extract_path`` it will attempt to download the data
from http://tseregression.org/.
The list of problems this function can download from the website is in
``datasets/tser_lists.py``. This function can load timestamped data, but it does
not store the time stamps. The time stamp loading is fragile, it will only work
if all data are floats.
Data is assumed to be in the standard .ts format: each row is a (possibly
multivariate) time series. Each dimension is separated by a colon, each value in
a series is comma separated. For examples see aeon.datasets.data. ArrowHead
is an example of a univariate equal length problem, BasicMotions an equal length
multivariate problem.
Parameters
----------
Expand Down Expand Up @@ -1226,18 +1267,24 @@ def load_regression(name, split=None, extract_path=None, return_metadata=True):
def load_classification(name, split=None, extract_path=None, return_metadata=True):
"""Load a classification dataset.
Loads a TSC dataset from extract_path, or from timeseriesclassification.com,
if not on extract path.
If you want to load a problem from a local file, specify the
location in ``extract_path``. This function assumes the data is stored in format
<extract_path>/<name>/<name>_TRAIN.ts and <extract_path>/<name>/<name>_TEST.ts.
If you want to load a file directly from a full path, use the function
`load_from_tsfile`` directly. If you do not specify ``extract_path``, or if the
problem is not present in ``extract_path`` it will attempt to download the data
from https://timeseriesclassification.com/.
The list of problems this function can download from the website is in
``datasets/tsc_lists.py``. This function can load timestamped data, but it does
not store the time stamps. The time stamp loading is fragile, it will only work
if all data are floats.
Data is assumed to be in the standard .ts format: each row is a (possibly
multivariate) time series.
Each dimension is separated by a colon, each value in a series is comma
separated. For examples see aeon.datasets.data.tsc. ArrowHead is an example of
a univariate equal length problem, BasicMotions an equal length multivariate
problem.
Data is stored in extract_path/name/name.ts, extract_path/name/name_TRAIN.ts and
extract_path/name/name_TEST.ts.
multivariate) time series. Each dimension is separated by a colon, each value in
a series is comma separated. For examples see aeon.datasets.data. ArrowHead
is an example of a univariate equal length problem, BasicMotions an equal length
multivariate problem.
Parameters
----------
Expand Down
8 changes: 2 additions & 6 deletions aeon/datasets/_data_writers.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def write_results_to_uea_format(
third_line : str
summary performance information (see comment below)
"""
if len(y_true) != len(y_pred):
if y_true is not None and len(y_true) != len(y_pred):
raise IndexError(
"The number of predicted values is not the same as the "
"number of actual class values"
Expand Down Expand Up @@ -287,11 +287,7 @@ def _write_header(
extension=None,
):
if class_labels is not None and regression:
raise ValueError(
"Cannot have class_labels and targetlabel. If the problem "
"is classification, add class_labels. If regression, "
"set targetlabel to true."
)
raise ValueError("Cannot have class_labels true for a regression problem")
# create path if it does not exist
dir = f"{str(path)}/"
try:
Expand Down
Loading

0 comments on commit 7e86da2

Please sign in to comment.