Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ENH] SFA testing #1884

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion aeon/distances/_dft_sfa_mindist.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def dft_sfa_mindist(
... alphabet_size=8,
... window_size=x.shape[-1],
... norm=True,
... lower_bounding_distances=True # This must be set!
... lower_bounding=True # This must be set!
... )
>>> transform.fit(x)
SFAFast(...)
Expand Down
2 changes: 1 addition & 1 deletion aeon/distances/_sfa_mindist.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def sfa_mindist(x: np.ndarray, y: np.ndarray, breakpoints: np.ndarray) -> float:
... alphabet_size=8,
... window_size=x.shape[-1],
... norm=True,
... lower_bounding_distances=True # This must be set!
... lower_bounding=True # This must be set!
... )
>>> transform.fit(x)
SFAFast(...)
Expand Down
4 changes: 2 additions & 2 deletions aeon/distances/tests/test_symbolic_mindist.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def test_sfa_mindist():
binning_method=histogram_type,
norm=True,
variance=True,
lower_bounding_distances=True, # This must be set!
lower_bounding=True, # This must be set!
)

sfa_old = SFA(
Expand All @@ -79,7 +79,7 @@ def test_sfa_mindist():
window_size=X_train.shape[-1],
binning_method=histogram_type,
norm=True,
lower_bounding_distances=True, # This must be set!
lower_bounding=True, # This must be set!
)
transforms = [sfa_old, sfa_fast]

Expand Down
84 changes: 37 additions & 47 deletions aeon/transformations/collection/dictionary_based/_sfa.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Configurable SFA transform for discretising time series into words.
"""

__maintainer__ = []
__maintainer__ = ["patrickzib", "MatthewMiddlehurst"]
__all__ = ["SFA"]

import math
Expand All @@ -21,13 +21,14 @@

from aeon.transformations.collection import BaseCollectionTransformer

# The binning methods to use: equi-depth, equi-width, information gain or kmeans
# The binning methods to use
binning_methods = {
"equi-depth",
"equi-width",
"information-gain",
"information-gain-mae",
"kmeans",
"quantile",
}


Expand All @@ -40,47 +41,42 @@ class SFA(BaseCollectionTransformer):
shorten the series with DFT
discretise the shortened series into bins set by MFC
form a word from these discrete values
by default SFA produces a single word per series (window_size=0)
if a window is used, it forms a histogram of counts of words.
SFA returns a dictionary of word counts for each series

This is a slower but more flexible version of the SFA transform, which can store
greater than 64 bit words. This is at the cost of efficiency, however.

Parameters
----------
word_length: int, default = 8
length of word to shorten window to (using DFT)

alphabet_size: int, default = 4
number of values to discretise each value to

window_size: int, default = 12
size of window for sliding. Input series
length for whole series transform

norm: boolean, default = False
mean normalise words by dropping first fourier coefficient

binning_method: {"equi-depth", "equi-width", "information-gain",
"information-gain-mae", "kmeans"}, default="equi-depth"
the binning method used to derive the breakpoints.

anova: boolean, default = False
If True, the Fourier coefficient selection is done via a one-way
ANOVA test. If False, the first Fourier coefficients are selected.
Only applicable if labels are given

bigrams: boolean, default = False
whether to create bigrams of SFA words

skip_grams: boolean, default = False
whether to create skip-grams of SFA words

remove_repeat_words: boolean, default = False
whether to use numerosity reduction (default False)
word_length : int, default=8
Length of word to shorten window to (using PAA).
alphabet_size : int, default=4
Number of values to discretise each value to.
window_size : int, default=12
Size of window for sliding. Input series length for whole series transform.
norm : boolean, default=False
Mean normalise words by dropping first fourier coefficient.
binning_method : str, default="equi-depth"
The binning method used to derive the breakpoints. One of {"equi-depth",
"equi-width", "information-gain", "information-gain-mae", "kmeans",
"quantile"}.
anova : boolean, default=False
If True, the Fourier coefficient selection is done via a one-way ANOVA test.
If False, the first Fourier coefficients are selected. Only applicable if
labels are given.
bigrams : boolean, default=False
Whether to create bigrams of SFA words.
skip_grams : boolean, default=False
Whether to create skip-grams of SFA words.
levels: int, default=1
Number of spatial pyramid levels
remove_repeat_words : boolean, default=False
Whether to use numerosity reduction.

lower_bounding_distances : boolean, default = None
If set to True, the FFT is normed to allow for ED lower bounding.

levels: int, default = 1
Number of spatial pyramid levels


save_words: boolean, default = False
whether to save the words generated for each series (default False)
Expand Down Expand Up @@ -123,10 +119,9 @@ def __init__(
anova=False,
bigrams=False,
skip_grams=False,
remove_repeat_words=False,
levels=1,
remove_repeat_words=False,
lower_bounding=True,
lower_bounding_distances=None,
save_words=False,
keep_binning_dft=False,
use_fallback_dft=False,
Expand All @@ -151,13 +146,8 @@ def __init__(

self.norm = norm
self.lower_bounding = lower_bounding
self.lower_bounding_distances = lower_bounding_distances

self.inverse_sqrt_win_size = (
1.0 / math.sqrt(window_size)
if (not lower_bounding or lower_bounding_distances)
else 1.0
)
self.inverse_sqrt_win_size = 1.0 / math.sqrt(window_size)

self.remove_repeat_words = remove_repeat_words

Expand Down Expand Up @@ -626,7 +616,7 @@ def _binning_dft(self, series, num_windows_per_inst):
self.dft_length,
self.norm,
self.inverse_sqrt_win_size,
self.lower_bounding or self.lower_bounding_distances,
self.lower_bounding,
)
if self._use_fallback_dft
else self._fast_fourier_transform(row)
Expand Down Expand Up @@ -664,7 +654,7 @@ def _fast_fourier_transform(self, series):
dft = np.empty((length,), dtype=reals.dtype)
dft[0::2] = reals[: np.uint32(length / 2)]
dft[1::2] = imags[: np.uint32(length / 2)]
if self.lower_bounding or self.lower_bounding_distances:
if self.lower_bounding:
dft[1::2] = dft[1::2] * -1 # lower bounding
dft *= self.inverse_sqrt_win_size / std
return dft[start:]
Expand Down Expand Up @@ -741,7 +731,7 @@ def _mft(self, series):
self.dft_length,
self.norm,
self.inverse_sqrt_win_size,
self.lower_bounding or self.lower_bounding_distances,
self.lower_bounding,
apply_normalising_factor=False,
cut_start_if_norm=False,
)
Expand Down
Loading
Loading