diff --git a/Coffee/Data/DataPattern.py b/Coffee/Data/DataExtractor.py similarity index 77% rename from Coffee/Data/DataPattern.py rename to Coffee/Data/DataExtractor.py index 79f16ab..6c92947 100644 --- a/Coffee/Data/DataPattern.py +++ b/Coffee/Data/DataExtractor.py @@ -148,6 +148,47 @@ class GrokPattern(PatternInterface): pass +class SplitPattern(PatternInterface): + def __init__(self, name: str, keyword: str, splitter: str, kv_splitter: str, fields: dict, tags: dict = [], processors=[], tests=[]): + self.name = name + self.keyword = keyword + self.splitter = splitter + self.kv_splitter = kv_splitter + self.fields = fields + self.tags = tags + self.processors = processors + self.tests = tests + self.match_count = 0 + + def get_unique_id(self): + return self.name + + def get_name(self): + return self.name + + def get_tags(self): + return self.tags + + def get_match_count(self): + return self.match_count + + def match(self, s): + fields = s.split(self.splitter) + if self.keyword not in fields: + return None + li = [kv for kv in [f.split(self.kv_splitter, 1) for f in fields] if len(kv) == 2] + result = {L[0]: self.fields[L[0]](L[1]) for L in li if L[0] in self.fields.keys()} + + self.match_count = self.match_count + 1 + + for p in self.processors: + if isinstance(p, types.FunctionType): + result = p(self.name, result) + else: + result = p.process(self.name, result) + return result + + class PatternGroupBuilder: """ build a pattern group diff --git a/Coffee/Data/DataLoader.py b/Coffee/Data/DataLoader.py index 64b25e8..d63c310 100644 --- a/Coffee/Data/DataLoader.py +++ b/Coffee/Data/DataLoader.py @@ -10,9 +10,9 @@ from rich.progress import Progress import io -from Coffee.Data.DataPattern import RegexPattern, PatternGroupBuilder +from Coffee.Data.DataExtractor import RegexPattern, PatternGroupBuilder from Coffee.Core.Utils import merge_datetime -from Coffee.Data.DataPattern import PatternGroup +from Coffee.Data.DataExtractor import PatternGroup from Coffee.Data.DataFlow import DataPoint, DataLoader diff --git a/Coffee/Data/TimestampPatterns.py b/Coffee/Data/TimestampPatterns.py index 53c1d64..7efb935 100644 --- a/Coffee/Data/TimestampPatterns.py +++ b/Coffee/Data/TimestampPatterns.py @@ -1,7 +1,7 @@ # Copyright 2022 tkorays. All Rights Reserved. # Licensed to MIT under a Contributor Agreement. -from Coffee.Data.DataPattern import RegexPattern +from Coffee.Data.DataExtractor import RegexPattern DEFAULT_TS_PATTERNS = [ RegexPattern( diff --git a/Coffee/Data/__init__.py b/Coffee/Data/__init__.py index c88ffde..04689d5 100644 --- a/Coffee/Data/__init__.py +++ b/Coffee/Data/__init__.py @@ -7,7 +7,7 @@ LogFileDataLoader ) from .DataModel import DataModel -from .DataPattern import ( +from .DataExtractor import ( PatternInterface, RegexPattern, PatternGroup, PatternGroupBuilder ) diff --git a/requirements.txt b/requirements.txt index fd3eab3..4b13a49 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,4 +12,5 @@ pid python-daemon django h5py +numpy ddt diff --git a/setup.py b/setup.py index 765b4ac..0b10f1e 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ 'python-daemon', 'django', 'h5py', + 'numpy', 'ddt', ], dependency_links=[