Skip to content

Commit d59bd25

Browse files
authored
[ENH] Add AEAttentionBiGRUClusterer (#1725)
* Add AttentionBiGRUNetwork * Add path * Add layer under _check_soft_dependencies scope * Add num_input_samples kwarg * Add tf.expand_dims * fix bug to improve coverage * refactor tests * minor * minor * minor * minor * minor * minor * minor * refactor tests * refactor tests * update BaseDeepLearningNetwork * Add tag * Register tags for networks * remove auto-encoder tag from registry * refactor as per _config * update base: * typo fix * refactor tests * Add AttBGRU Clusterer * Parametrize tests * typo * bug fix * refactor tags * minor * minor * fix bug * minor * minor updates * minor * minor * minor * Minor * refactor tests * fixes * minor * minor fixes * skip encodernetwork test * refactor docsting example * minor * refactor attention layer to handle keras tensor * typo * minor * change _AttentionLayer implementation * fix keras-tensor and tensorflow compatibility issues and model saving * Remove custom attention layer * minor fixes * add default modelcheckpoint * fixes * Add AEAttentionBiGRUClusterer notebook example * Add estimator kwarg * Automatic `pre-commit` fixes * fixes * Update _ae_abgru.py * fix notebooks * remove deprecated * Delete examples/clustering/deep_clustering.ipynb * add metrics kwarg * remove return_X_y * Update _ae_abgru.py * Update _ae_abgru.py * Update _ae_abgru.py * minor --------- Co-authored-by: aadya940 <[email protected]>
1 parent b90cb5d commit d59bd25

File tree

4 files changed

+329
-2
lines changed

4 files changed

+329
-2
lines changed

aeon/clustering/deep_learning/__init__.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,12 @@
22

33
__all__ = [
44
"BaseDeepClusterer",
5-
"AEBiGRUClusterer",
65
"AEFCNClusterer",
76
"AEResNetClusterer",
7+
"AEAttentionBiGRUClusterer",
8+
"AEBiGRUClusterer",
89
]
10+
from aeon.clustering.deep_learning._ae_abgru import AEAttentionBiGRUClusterer
911
from aeon.clustering.deep_learning._ae_bgru import AEBiGRUClusterer
1012
from aeon.clustering.deep_learning._ae_fcn import AEFCNClusterer
1113
from aeon.clustering.deep_learning._ae_resnet import AEResNetClusterer
+325
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
"""Deep Learning Auto-Encoder using Attention Bidirectional GRU Network."""
2+
3+
__maintainer__ = []
4+
__all__ = ["AEAttentionBiGRUClusterer"]
5+
6+
import gc
7+
import os
8+
import time
9+
from copy import deepcopy
10+
11+
from sklearn.utils import check_random_state
12+
13+
from aeon.clustering import DummyClusterer
14+
from aeon.clustering.deep_learning.base import BaseDeepClusterer
15+
from aeon.networks import AEAttentionBiGRUNetwork
16+
17+
18+
class AEAttentionBiGRUClusterer(BaseDeepClusterer):
19+
"""Auto-Encoder based the Attention Bidirectional GRU Network.
20+
21+
Parameters
22+
----------
23+
n_clusters : int, default=None
24+
Number of clusters for the deep learnign model.
25+
clustering_algorithm : str, default="deprecated"
26+
Use 'estimator' parameter instead.
27+
clustering_params : dict, default=None
28+
Use 'estimator' parameter instead.
29+
estimator : aeon clusterer, default=None
30+
An aeon estimator to be built using the transformed data.
31+
Defaults to aeon TimeSeriesKMeans() with euclidean distance
32+
and mean averaging method and n_clusters set to 2.
33+
latent_space_dim : int, default=128
34+
Dimension of the latent space of the auto-encoder.
35+
n_layers_encoder : int, default = 2
36+
Number of Attention Bidirectional GRU Layers in the encoder.
37+
n_layers_encoder : int, default = 2
38+
Number of Attention Bidirectional GRU Layers in the decoder.
39+
activation_encoder : str or list of str, default = "relu"
40+
Activation used after the Attention Bidirectional GRU Layer of the encoder.
41+
activation_encoder : str or list of str, default = "relu"
42+
Activation used after the Attention Bidirectional GRU Layer of the decoder.
43+
n_epochs : int, default = 2000
44+
The number of epochs to train the model.
45+
batch_size : int, default = 16
46+
The number of samples per gradient update.
47+
use_mini_batch_size : bool, default = True,
48+
Whether or not to use the mini batch size formula.
49+
random_state : int, RandomState instance or None, default=None
50+
If `int`, random_state is the seed used by the random number generator;
51+
If `RandomState` instance, random_state is the random number generator;
52+
If `None`, the random number generator is the `RandomState` instance used
53+
by `np.random`.
54+
Seeded random number generation can only be guaranteed on CPU processing,
55+
GPU processing will be non-deterministic.
56+
verbose : boolean, default = False
57+
Whether to output extra information.
58+
loss : str, default="mean_squared_error"
59+
Fit parameter for the keras model.
60+
metrics : str, default=["mean_squared_error"]
61+
Metrics to evaluate model predictions.
62+
optimizer : keras.optimizers object, default = Adam(lr=0.01)
63+
Specify the optimizer and the learning rate to be used.
64+
file_path : str, default = "./"
65+
File path to save best model.
66+
save_best_model : bool, default = False
67+
Whether or not to save the best model, if the
68+
modelcheckpoint callback is used by default,
69+
this condition, if True, will prevent the
70+
automatic deletion of the best saved model from
71+
file and the user can choose the file name.
72+
save_last_model : bool, default = False
73+
Whether or not to save the last model, last
74+
epoch trained, using the base class method
75+
save_last_model_to_file.
76+
best_file_name : str, default = "best_model"
77+
The name of the file of the best model, if
78+
save_best_model is set to False, this parameter
79+
is discarded.
80+
last_file_name : str, default = "last_model"
81+
The name of the file of the last model, if
82+
save_last_model is set to False, this parameter
83+
is discarded.
84+
callbacks : keras.callbacks, default = None
85+
List of keras callbacks.
86+
87+
Examples
88+
--------
89+
>>> from aeon.clustering.deep_learning import AEAttentionBiGRUClusterer
90+
>>> from aeon.clustering import DummyClusterer
91+
>>> from aeon.datasets import load_unit_test
92+
>>> X_train, y_train = load_unit_test(split="train")
93+
>>> X_test, y_test = load_unit_test(split="test")
94+
>>> _clst = DummyClusterer(n_clusters=2)
95+
>>> abgruc=AEAttentionBiGRUClusterer(estimator=_clst, n_epochs=20,
96+
... batch_size=4) # doctest: +SKIP
97+
>>> abgruc.fit(X_train) # doctest: +SKIP
98+
AEAttentionBiGRUClusterer(...)
99+
"""
100+
101+
def __init__(
102+
self,
103+
n_clusters=None,
104+
estimator=None,
105+
clustering_algorithm="deprecated",
106+
clustering_params=None,
107+
latent_space_dim=128,
108+
n_layers_encoder=2,
109+
n_layers_decoder=2,
110+
activation_encoder="relu",
111+
activation_decoder="relu",
112+
n_epochs=2000,
113+
batch_size=32,
114+
use_mini_batch_size=False,
115+
random_state=None,
116+
verbose=False,
117+
loss="mse",
118+
metrics=None,
119+
optimizer="Adam",
120+
file_path="./",
121+
save_best_model=False,
122+
save_last_model=False,
123+
best_file_name="best_model",
124+
last_file_name="last_file",
125+
callbacks=None,
126+
):
127+
self.latent_space_dim = latent_space_dim
128+
self.n_layers_encoder = n_layers_encoder
129+
self.n_layers_decoder = n_layers_decoder
130+
self.activation_encoder = activation_encoder
131+
self.activation_decoder = activation_decoder
132+
self.optimizer = optimizer
133+
self.loss = loss
134+
self.metrics = metrics
135+
self.verbose = verbose
136+
self.use_mini_batch_size = use_mini_batch_size
137+
self.callbacks = callbacks
138+
self.file_path = file_path
139+
self.n_epochs = n_epochs
140+
self.save_best_model = save_best_model
141+
self.save_last_model = save_last_model
142+
self.best_file_name = best_file_name
143+
self.random_state = random_state
144+
145+
super().__init__(
146+
n_clusters=n_clusters,
147+
clustering_algorithm=clustering_algorithm,
148+
clustering_params=clustering_params,
149+
estimator=estimator,
150+
batch_size=batch_size,
151+
last_file_name=last_file_name,
152+
)
153+
154+
self._network = AEAttentionBiGRUNetwork(
155+
latent_space_dim=self.latent_space_dim,
156+
n_layers_encoder=self.n_layers_encoder,
157+
n_layers_decoder=self.n_layers_decoder,
158+
activation_encoder=self.activation_encoder,
159+
activation_decoder=self.activation_decoder,
160+
)
161+
162+
def build_model(self, input_shape, **kwargs):
163+
"""Construct a compiled, un-trained, keras model that is ready for training.
164+
165+
In aeon, time series are stored in numpy arrays of shape
166+
(n_channels,n_timepoints). Keras/tensorflow assume
167+
data is in shape (n_timepoints,n_channels). This method also assumes
168+
(n_timepoints,n_channels). Transpose should happen in fit.
169+
170+
Parameters
171+
----------
172+
input_shape : tuple
173+
The shape of the data fed into the input layer, should be
174+
(n_timepoints,n_channels).
175+
176+
Returns
177+
-------
178+
output : a compiled Keras Model.
179+
"""
180+
import numpy as np
181+
import tensorflow as tf
182+
183+
rng = check_random_state(self.random_state)
184+
self.random_state_ = rng.randint(0, np.iinfo(np.int32).max)
185+
tf.keras.utils.set_random_seed(self.random_state_)
186+
encoder, decoder = self._network.build_network(input_shape, **kwargs)
187+
188+
input_layer = tf.keras.layers.Input(input_shape, name="input layer")
189+
encoder_output = encoder(input_layer)
190+
decoder_output = decoder(encoder_output)
191+
output_layer = tf.keras.layers.Reshape(
192+
target_shape=input_shape, name="outputlayer"
193+
)(decoder_output)
194+
195+
model = tf.keras.models.Model(inputs=input_layer, outputs=output_layer)
196+
197+
self.optimizer_ = (
198+
tf.keras.optimizers.Adam() if self.optimizer is None else self.optimizer
199+
)
200+
201+
if self.metrics is None:
202+
self._metrics = ["mean_squared_error"]
203+
elif isinstance(self.metrics, list):
204+
self._metrics = self.metrics
205+
elif isinstance(self.metrics, str):
206+
self._metrics = [self.metrics]
207+
else:
208+
raise ValueError("Metrics should be a list, string, or None.")
209+
210+
model.compile(optimizer=self.optimizer_, loss=self.loss, metrics=self._metrics)
211+
212+
return model
213+
214+
def _fit(self, X):
215+
"""Fit the classifier on the training set (X, y).
216+
217+
Parameters
218+
----------
219+
X : np.ndarray of shape = (n_cases (n), n_channels (d), n_timepoints (m))
220+
The training input samples.
221+
222+
Returns
223+
-------
224+
self : object
225+
"""
226+
import tensorflow as tf
227+
228+
# Transpose to conform to Keras input style.
229+
X = X.transpose(0, 2, 1)
230+
231+
self.input_shape = X.shape[1:]
232+
self.training_model_ = self.build_model(self.input_shape)
233+
234+
if self.verbose:
235+
self.training_model_.summary()
236+
237+
if self.use_mini_batch_size:
238+
mini_batch_size = min(self.batch_size, X.shape[0] // 10)
239+
else:
240+
mini_batch_size = self.batch_size
241+
242+
self.file_name_ = (
243+
self.best_file_name if self.save_best_model else str(time.time_ns())
244+
)
245+
246+
if self.callbacks is None:
247+
self.callbacks_ = [
248+
tf.keras.callbacks.ReduceLROnPlateau(
249+
monitor="loss", factor=0.5, patience=50, min_lr=0.0001
250+
),
251+
tf.keras.callbacks.ModelCheckpoint(
252+
filepath=self.file_path + self.file_name_ + ".keras",
253+
monitor="loss",
254+
save_best_only=True,
255+
),
256+
]
257+
else:
258+
self.callbacks_ = self._get_model_checkpoint_callback(
259+
callbacks=self.callbacks,
260+
file_path=self.file_path,
261+
file_name=self.file_name_,
262+
)
263+
264+
self.history = self.training_model_.fit(
265+
X,
266+
X,
267+
batch_size=mini_batch_size,
268+
epochs=self.n_epochs,
269+
verbose=self.verbose,
270+
callbacks=self.callbacks_,
271+
)
272+
273+
try:
274+
self.model_ = tf.keras.models.load_model(
275+
self.file_path + self.file_name_ + ".keras",
276+
compile=False,
277+
)
278+
if not self.save_best_model:
279+
os.remove(self.file_path + self.file_name_ + ".keras")
280+
except FileNotFoundError:
281+
self.model_ = deepcopy(self.training_model_)
282+
283+
self._fit_clustering(X=X)
284+
285+
gc.collect()
286+
287+
return self
288+
289+
def _score(self, X, y=None):
290+
# Transpose to conform to Keras input style.
291+
X = X.transpose(0, 2, 1)
292+
latent_space = self.model_.layers[1].predict(X)
293+
return self._estimator.score(latent_space)
294+
295+
@classmethod
296+
def _get_test_params(cls, parameter_set="default"):
297+
"""Return testing parameter settings for the estimator.
298+
299+
Parameters
300+
----------
301+
parameter_set : str, default="default"
302+
Name of the set of test parameters to return, for use in tests. If no
303+
special parameters are defined for a value, will return `"default"` set.
304+
For classifiers, a "default" set of parameters should be provided for
305+
general testing, and a "results_comparison" set for comparing against
306+
previously recorded results if the general set does not produce suitable
307+
probabilities to compare against.
308+
309+
Returns
310+
-------
311+
params : dict or list of dict, default={}
312+
Parameters to create testing instances of the class.
313+
Each dict are parameters to construct an "interesting" test instance, i.e.,
314+
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
315+
`create_test_instance` uses the first (or only) dictionary in `params`.
316+
"""
317+
param1 = {
318+
"estimator": DummyClusterer(n_clusters=2),
319+
"n_epochs": 1,
320+
"batch_size": 4,
321+
"n_layers_encoder": 1,
322+
"n_layers_decoder": 1,
323+
}
324+
325+
return [param1]

aeon/networks/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"DCNNNetwork",
1717
"AEDCNNNetwork",
1818
"AEAttentionBiGRUNetwork",
19+
"AEBiGRUNetwork",
1920
"AEDRNNNetwork",
2021
"AEBiGRUNetwork",
2122
]

aeon/networks/_ae_abgru.py

-1
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ class AEAttentionBiGRUNetwork(BaseDeepLearningNetwork):
3333
Discovery and Data Mining: 24th Pacific-Asia Conference, PAKDD 2020, Singapore,
3434
May 11-14, 2020, Proceedings, Part I 24 (pp. 318-329). Springer International
3535
Publishing.
36-
3736
"""
3837

3938
_config = {

0 commit comments

Comments
 (0)