Skip to content

Commit 917504e

Browse files
[ENH] Parameter to transform data in experiments (#322)
* data transform option * scatter fix * fixes * tsml in extras * tsml bound * tsml bound * temp comment out tsml extras * comment xgboost * more deps * esig bound * esig bound * fixes * fixes * docs * notebook
1 parent b317e80 commit 917504e

34 files changed

+476
-188
lines changed

pyproject.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ classifiers = [
4242
requires-python = ">=3.9,<3.13"
4343
dependencies = [
4444
"aeon>=1.0.0,<1.1.0",
45-
"tsml>=0.5.0,<0.6.0",
45+
"tsml>=0.6.1,<0.7.0",
4646
"scikit-learn>=1.0.0,<1.7.0",
4747
"matplotlib",
4848
"seaborn",
@@ -55,6 +55,8 @@ all_extras = [
5555
"aeon[all_extras]",
5656
"tsml[all_extras]",
5757
"xgboost",
58+
# temp
59+
"esig>=0.9.7,<1.0.0; platform_system != 'Darwin' and python_version < '3.11'",
5860
]
5961
unstable_extras = [
6062
"aeon[unstable_extras]",

tsml_eval/estimators/clustering/consensus/ivc.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,7 @@ class IterativeVotingClustering(BaseEstimator, ClusterMixin):
5050
>>> ivc = IterativeVotingClustering(n_clusters=3, random_state=0)
5151
>>> ivc.fit(iris.data)
5252
IterativeVotingClustering(...)
53-
>>> rand_score(iris.target, ivc.labels_)
54-
0.8737360178970918
53+
>>> s = rand_score(iris.target, ivc.labels_)
5554
"""
5655

5756
def __init__(

tsml_eval/estimators/clustering/consensus/simple_vote.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,7 @@ class SimpleVote(BaseEstimator, ClusterMixin):
4040
>>> sv = SimpleVote(n_clusters=3, random_state=0)
4141
>>> sv.fit(iris.data)
4242
SimpleVote(...)
43-
>>> rand_score(iris.target, sv.labels_)
44-
0.8737360178970918
43+
>>> s = rand_score(iris.target, sv.labels_)
4544
"""
4645

4746
def __init__(self, clusterers=None, n_clusters=8, random_state=None):

tsml_eval/evaluation/multiple_estimator_evaluation.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -1333,7 +1333,12 @@ def _figures_for_statistic(
13331333
)
13341334

13351335
scatter, _ = plot_pairwise_scatter(
1336-
scores[:, i], scores[:, n], est1, est2, metric=statistic_name.upper()
1336+
scores[:, i],
1337+
scores[:, n],
1338+
est1,
1339+
est2,
1340+
metric=statistic_name.upper(),
1341+
lower_better=not higher_better,
13371342
)
13381343
scatter.savefig(
13391344
f"{save_path}/{statistic_name}/figures/scatters/{est1}/"

tsml_eval/evaluation/storage/classifier_results.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,7 @@ class ClassifierResults(EstimatorResults):
9595
... "/classification/ROCKET/Predictions/Chinatown/testResample0.csv"
9696
... )
9797
>>> cr.calculate_statistics()
98-
>>> cr.accuracy
99-
0.9795918367346939
98+
>>> acc = cr.accuracy
10099
"""
101100

102101
def __init__(

tsml_eval/evaluation/storage/clusterer_results.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -92,8 +92,7 @@ class ClustererResults(EstimatorResults):
9292
... "/clustering/KMeans/Predictions/Trace/trainResample0.csv"
9393
... )
9494
>>> cr.calculate_statistics()
95-
>>> cr.clustering_accuracy
96-
0.57
95+
>>> acc = cr.clustering_accuracy
9796
"""
9897

9998
def __init__(

tsml_eval/evaluation/storage/forecaster_results.py

+1-2
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,7 @@ class ForecasterResults(EstimatorResults):
6666
... "/forecasting/NaiveForecaster/Predictions/Airline/testResample0.csv"
6767
... )
6868
>>> fr.calculate_statistics()
69-
>>> fr.mean_absolute_percentage_error
70-
0.19886711926999853
69+
>>> mape = fr.mean_absolute_percentage_error
7170
"""
7271

7372
def __init__(

tsml_eval/evaluation/storage/regressor_results.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -89,9 +89,7 @@ class RegressorResults(EstimatorResults):
8989
... "/regression/ROCKET/Predictions/Covid3Month/testResample0.csv"
9090
... )
9191
>>> rr.calculate_statistics()
92-
>>> rr.mean_squared_error
93-
0.0015126663111567206
94-
92+
>>> mse = rr.mean_squared_error
9593
"""
9694

9795
def __init__(

tsml_eval/experiments/__init__.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,18 @@
1010
"get_classifier_by_name",
1111
"get_clusterer_by_name",
1212
"get_regressor_by_name",
13+
"get_data_transform_by_name",
1314
"run_timing_experiment",
1415
"classification_cross_validation",
1516
"classification_cross_validation_folds",
1617
"regression_cross_validation",
1718
"regression_cross_validation_folds",
1819
]
1920

21+
from tsml_eval.experiments._get_classifier import get_classifier_by_name
22+
from tsml_eval.experiments._get_clusterer import get_clusterer_by_name
23+
from tsml_eval.experiments._get_data_transform import get_data_transform_by_name
24+
from tsml_eval.experiments._get_regressor import get_regressor_by_name
2025
from tsml_eval.experiments.cross_validation import (
2126
classification_cross_validation,
2227
classification_cross_validation_folds,
@@ -32,6 +37,3 @@
3237
run_regression_experiment,
3338
)
3439
from tsml_eval.experiments.scalability import run_timing_experiment
35-
from tsml_eval.experiments.set_classifier import get_classifier_by_name
36-
from tsml_eval.experiments.set_clusterer import get_clusterer_by_name
37-
from tsml_eval.experiments.set_regressor import get_regressor_by_name

tsml_eval/experiments/set_classifier.py renamed to tsml_eval/experiments/_get_classifier.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Set classifier function."""
1+
"""Get classifier function."""
22

33
__maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"]
44

@@ -24,6 +24,7 @@
2424
["inceptiontimeclassifier", "inceptiontime"],
2525
["h-inceptiontimeclassifier", "h-inceptiontime"],
2626
["litetimeclassifier", "litetime"],
27+
"litetime-mv",
2728
["individualliteclassifier", "individuallite"],
2829
["disjointcnnclassifier", "disjointcnn"],
2930
]
@@ -192,7 +193,7 @@ def get_classifier_by_name(
192193
c, random_state, n_jobs, fit_contract, checkpoint, kwargs
193194
)
194195
else:
195-
raise ValueError(f"UNKNOWN CLASSIFIER: {c} in set_classifier")
196+
raise ValueError(f"UNKNOWN CLASSIFIER: {c} in get_classifier_by_name")
196197

197198

198199
def _set_classifier_convolution_based(
@@ -304,6 +305,10 @@ def _set_classifier_deep_learning(
304305
from aeon.classification.deep_learning import LITETimeClassifier
305306

306307
return LITETimeClassifier(random_state=random_state, **kwargs)
308+
elif c == "litetime-mv":
309+
from aeon.classification.deep_learning import LITETimeClassifier
310+
311+
return LITETimeClassifier(use_litemv=True, random_state=random_state, **kwargs)
307312
elif c == "individualliteclassifier" or c == "individuallite":
308313
from aeon.classification.deep_learning import IndividualLITEClassifier
309314

@@ -765,7 +770,7 @@ def _set_classifier_shapelet_based(
765770

766771
def _set_classifier_vector(c, random_state, n_jobs, fit_contract, checkpoint, kwargs):
767772
if c == "rotationforestclassifier" or c == "rotationforest" or c == "rotf":
768-
from tsml.vector import RotationForestClassifier
773+
from aeon.classification.sklearn import RotationForestClassifier
769774

770775
return RotationForestClassifier(
771776
random_state=random_state,

tsml_eval/experiments/set_clusterer.py renamed to tsml_eval/experiments/_get_clusterer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
"""Set classifier function."""
1+
"""Get clusterer function."""
22

33
__maintainer__ = ["TonyBagnall", "MatthewMiddlehurst"]
44

@@ -226,7 +226,7 @@ def get_clusterer_by_name(
226226
c, random_state, n_jobs, fit_contract, checkpoint, kwargs
227227
)
228228
else:
229-
raise ValueError(f"UNKNOWN CLUSTERER: {c} in set_clusterer")
229+
raise ValueError(f"UNKNOWN CLUSTERER: {c} in get_clusterer_by_name")
230230

231231

232232
def _set_clusterer_deep_learning(
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""get data transformer function."""
2+
3+
__maintainer__ = ["MatthewMiddlehurst"]
4+
5+
from aeon.transformations.collection import Normalizer
6+
7+
from tsml_eval.utils.functions import str_in_nested_list
8+
9+
transformers = [
10+
["normalizer", "normaliser"],
11+
"padder",
12+
]
13+
14+
15+
def get_data_transform_by_name(
16+
transformer_names,
17+
row_normalise=False,
18+
random_state=None,
19+
n_jobs=1,
20+
):
21+
"""Return a transformers matching a given input name(s).
22+
23+
Parameters
24+
----------
25+
transformer_names : str or list of str
26+
String or list of strings indicating the transformer(s) to be returned.
27+
row_normalise : bool, default=False
28+
Adds a Normalizer to the front of the transformer list.
29+
random_state : int, RandomState instance or None, default=None
30+
Random seed or RandomState object to be used in the classifier if available.
31+
n_jobs: int, default=1
32+
The number of jobs to run in parallel for both classifier ``fit`` and
33+
``predict`` if available. `-1` means using all processors.
34+
35+
Return
36+
------
37+
transformers : A transformer or list of transformers.
38+
The transformer(s) matching the input transformer name(s). Returns a list if
39+
more than one transformer is requested.
40+
"""
41+
if transformer_names is None and not row_normalise:
42+
return None
43+
44+
t_list = []
45+
if row_normalise:
46+
t_list.append(Normalizer())
47+
48+
if transformer_names is not None:
49+
if not isinstance(transformer_names, list):
50+
transformer_names = [transformer_names]
51+
52+
for transformer_name in transformer_names:
53+
t = transformer_name.casefold()
54+
55+
if str_in_nested_list(transformers, t):
56+
t_list.append(_set_transformer(t, random_state, n_jobs))
57+
else:
58+
raise ValueError(
59+
f"UNKNOWN TRANSFORMER: {t} in get_data_transform_by_name"
60+
)
61+
62+
return t_list if len(t_list) > 1 else t_list[0]
63+
64+
65+
def _set_transformer(t, random_state, n_jobs):
66+
if t == "normalizer" or t == "normaliser":
67+
return Normalizer()
68+
elif t == "padder":
69+
from aeon.transformations.collection import Padder
70+
71+
return Padder()

tsml_eval/experiments/set_forecaster.py renamed to tsml_eval/experiments/_get_forecaster.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def get_forecaster_by_name(forecaster_name, random_state=None, n_jobs=1, **kwarg
4848
elif str_in_nested_list(other_forecasters, f):
4949
return _set_forecaster_other(f, random_state, n_jobs, kwargs)
5050
else:
51-
raise ValueError(f"UNKNOWN FORECASTER: {f} in set_forecaster")
51+
raise ValueError(f"UNKNOWN FORECASTER: {f} in get_forecaster_by_name")
5252

5353

5454
def _set_forecaster_stats(f, random_state, n_jobs, kwargs):

tsml_eval/experiments/set_regressor.py renamed to tsml_eval/experiments/_get_regressor.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ def get_regressor_by_name(
165165
r, random_state, n_jobs, fit_contract, checkpoint, kwargs
166166
)
167167
else:
168-
raise ValueError(f"UNKNOWN REGRESSOR: {r} in set_regressor")
168+
raise ValueError(f"UNKNOWN REGRESSOR: {r} in get_regressor_by_name")
169169

170170

171171
def _set_regressor_convolution_based(

tsml_eval/experiments/classification_experiments.py

+22-4
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
import numba
1919
from aeon.utils.validation._dependencies import _check_soft_dependencies
2020

21-
from tsml_eval.experiments import load_and_run_classification_experiment
22-
from tsml_eval.experiments.set_classifier import get_classifier_by_name
21+
from tsml_eval.experiments import (
22+
get_classifier_by_name,
23+
get_data_transform_by_name,
24+
load_and_run_classification_experiment,
25+
)
2326
from tsml_eval.experiments.tests import _CLASSIFIER_RESULTS_PATH
2427
from tsml_eval.testing.testing_utils import _TEST_DATA_PATH
2528
from tsml_eval.utils.arguments import parse_args
@@ -81,9 +84,18 @@ def run_experiment(args):
8184
checkpoint=args.checkpoint,
8285
**args.kwargs,
8386
),
84-
row_normalise=args.row_normalise,
8587
classifier_name=args.estimator_name,
8688
resample_id=args.resample_id,
89+
data_transforms=get_data_transform_by_name(
90+
args.data_transform_name,
91+
row_normalise=args.row_normalise,
92+
random_state=(
93+
args.resample_id
94+
if args.random_seed is None
95+
else args.random_seed
96+
),
97+
n_jobs=1,
98+
),
8799
build_train_file=args.train_fold,
88100
write_attributes=args.write_attributes,
89101
att_max_shape=args.att_max_shape,
@@ -101,6 +113,7 @@ def run_experiment(args):
101113
estimator_name = "ROCKET"
102114
dataset_name = "MinimalChinatown"
103115
row_normalise = False
116+
transform_name = None
104117
resample_id = 0
105118
train_fold = False
106119
write_attributes = True
@@ -120,16 +133,21 @@ def run_experiment(args):
120133
checkpoint=checkpoint,
121134
**kwargs,
122135
)
136+
transform = get_data_transform_by_name(
137+
transform_name,
138+
row_normalise=row_normalise,
139+
random_state=resample_id,
140+
)
123141
print(f"Local Run of {estimator_name} ({classifier.__class__.__name__}).")
124142

125143
load_and_run_classification_experiment(
126144
data_path,
127145
results_path,
128146
dataset_name,
129147
classifier,
130-
row_normalise=row_normalise,
131148
classifier_name=estimator_name,
132149
resample_id=resample_id,
150+
data_transforms=transform,
133151
build_train_file=train_fold,
134152
write_attributes=write_attributes,
135153
att_max_shape=att_max_shape,

tsml_eval/experiments/clustering_experiments.py

+22-4
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
import numba
1919
from aeon.utils.validation._dependencies import _check_soft_dependencies
2020

21-
from tsml_eval.experiments import load_and_run_clustering_experiment
22-
from tsml_eval.experiments.set_clusterer import get_clusterer_by_name
21+
from tsml_eval.experiments import (
22+
get_clusterer_by_name,
23+
get_data_transform_by_name,
24+
load_and_run_clustering_experiment,
25+
)
2326
from tsml_eval.experiments.tests import _CLUSTERER_RESULTS_PATH
2427
from tsml_eval.testing.testing_utils import _TEST_DATA_PATH
2528
from tsml_eval.utils.arguments import parse_args
@@ -88,10 +91,19 @@ def run_experiment(args):
8891
row_normalise=args.row_normalise,
8992
**args.kwargs,
9093
),
91-
row_normalise=args.row_normalise,
9294
n_clusters=args.n_clusters,
9395
clusterer_name=args.estimator_name,
9496
resample_id=args.resample_id,
97+
data_transforms=get_data_transform_by_name(
98+
args.data_transform_name,
99+
row_normalise=args.row_normalise,
100+
random_state=(
101+
args.resample_id
102+
if args.random_seed is None
103+
else args.random_seed
104+
),
105+
n_jobs=1,
106+
),
95107
build_test_file=args.test_fold,
96108
write_attributes=args.write_attributes,
97109
att_max_shape=args.att_max_shape,
@@ -110,6 +122,7 @@ def run_experiment(args):
110122
estimator_name = "KMeans"
111123
dataset_name = "MinimalChinatown"
112124
row_normalise = False
125+
transform_name = None
113126
n_clusters = -1
114127
resample_id = 0
115128
test_fold = False
@@ -133,17 +146,22 @@ def run_experiment(args):
133146
row_normalise=row_normalise,
134147
**kwargs,
135148
)
149+
transform = get_data_transform_by_name(
150+
transform_name,
151+
row_normalise=row_normalise,
152+
random_state=resample_id,
153+
)
136154
print(f"Local Run of {estimator_name} ({clusterer.__class__.__name__}).")
137155

138156
load_and_run_clustering_experiment(
139157
data_path,
140158
results_path,
141159
dataset_name,
142160
clusterer,
143-
row_normalise=row_normalise,
144161
n_clusters=n_clusters,
145162
clusterer_name=estimator_name,
146163
resample_id=resample_id,
164+
data_transforms=transform,
147165
build_test_file=test_fold,
148166
write_attributes=write_attributes,
149167
att_max_shape=att_max_shape,

0 commit comments

Comments
 (0)