From 193410d49f54cfcc925bf8f06e0d54210ebf0a74 Mon Sep 17 00:00:00 2001 From: Juri Dispan Date: Thu, 2 Jun 2022 10:41:24 +0200 Subject: [PATCH 1/2] optimized invariantsMiner --- loglizer/models/InvariantsMiner.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/loglizer/models/InvariantsMiner.py b/loglizer/models/InvariantsMiner.py index fa924a9..cc978fd 100644 --- a/loglizer/models/InvariantsMiner.py +++ b/loglizer/models/InvariantsMiner.py @@ -262,17 +262,17 @@ def _join_set(self, item_list, length): """ set_len = len(item_list) - return_list = [] + return_set = set() for i in range(set_len): for j in range(i + 1, set_len): i_set = set(item_list[i]) j_set = set(item_list[j]) - if len(i_set.union(j_set)) == length: - joined = sorted(list(i_set.union(j_set))) - if joined not in return_list: - return_list.append(joined) - return_list = sorted(return_list) - return return_list + u = i_set.union(j_set) + if len(u) == length: + joined = tuple(sorted(list(u))) + return_set.add(joined) + return_set = sorted(return_set) + return return_set def _check_candi_valid(self, item, length, search_space): From 19ec54c076abe58b3fab6836985c5417590b3f3c Mon Sep 17 00:00:00 2001 From: Juri Dispan Date: Thu, 2 Jun 2022 10:46:43 +0200 Subject: [PATCH 2/2] fix dataloader for npz files --- loglizer/dataloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/loglizer/dataloader.py b/loglizer/dataloader.py index ae04778..b85a825 100644 --- a/loglizer/dataloader.py +++ b/loglizer/dataloader.py @@ -68,7 +68,7 @@ def load_HDFS(log_file, label_file=None, window='session', train_ratio=0.5, spli if log_file.endswith('.npz'): # Split training and validation set in a class-uniform way - data = np.load(log_file) + data = np.load(log_file, allow_pickle=True) x_data = data['x_data'] y_data = data['y_data'] (x_train, y_train), (x_test, y_test) = _split_data(x_data, y_data, train_ratio, split_type)