Skip to content

Commit

Permalink
simplify the log extract
Browse files Browse the repository at this point in the history
  • Loading branch information
tkorays committed Nov 6, 2022
1 parent 1824b03 commit 8511d19
Show file tree
Hide file tree
Showing 10 changed files with 95 additions and 50 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ venv/
.python-version
*.pyc
*.sqlite3
CoffeePlaybooks/*
CoffeePlaybooks/*
*.log
41 changes: 25 additions & 16 deletions Coffee/Data/DataLoader.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,23 @@
道生一,一生二,二生三,三生万物。
万物负阴而抱阳,冲气以为和。
"""


from abc import ABC
from datetime import datetime
from rich.console import Console
from rich.progress import Progress
import io

from Coffee.Data.DataPattern import RegexPattern
from Coffee.Data.DataPattern import RegexPattern, PatternGroupBuilder
from Coffee.Core.Utils import merge_datetime
from Coffee.Data.DataPattern import PatternGroup
from Coffee.Data.DataProcessor import DataSink
from Coffee.Data.DataProcessor import DataSource
from Coffee.Data.DataPoint import DataPoint


class DataLoader(DataSource):
class DataLoader(DataSource, DataSink, ABC):
"""
Load data from some source.
Load data from some source, and feed datapoints to all sinks.
"""
def __init__(self):
self.sinks = []
Expand All @@ -34,19 +33,32 @@ def add_sink(self, sink: DataSink):
def start(self):
pass

def on_data(self, datapoint: DataPoint) -> DataPoint:
for s in self.sinks:
datapoint = s.on_data(datapoint)
return datapoint

def finish(self, datapoint: DataPoint) -> DataPoint:
for s in self.sinks:
datapoint = s.finish(datapoint)
return datapoint

class LogFileDataLoader(DataLoader):
def __init__(self, log_path: str, pattern_preset: PatternGroup):
super().__init__()

class LogFileDataLoader(DataLoader, PatternGroupBuilder):
def __init__(self, log_path: str):
DataLoader.__init__(self)
PatternGroupBuilder.__init__(self)
self.log_path = log_path
self.pattern_preset = pattern_preset

def set_pattern_group(self, group: PatternGroup):
self.pattern_group = group
return self

def start(self):
# timestamp pattern in logs
PTS = self.pattern_preset.get_ts_patterns()
PTS = self.pattern_group.get_ts_patterns()
# data pattern in logs
PDT = self.pattern_preset.get_patterns()
PDT = self.pattern_group.get_patterns()

# try to get date in log path
# some logs don't have year/moth/day info
Expand Down Expand Up @@ -104,11 +116,8 @@ def start(self):
'tags': p.get_tags(),
}
)
for s in self.sinks:
dp = s.on_data(dp)
dp = self.on_data(dp)

dp = DataPoint.make_meta_datapoint({})
for s in self.sinks:
dp = s.finish(dp)
dp = self.finish(dp)
return dp

22 changes: 11 additions & 11 deletions Coffee/Data/DataPattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,25 +148,25 @@ class PatternGroupBuilder:
"""
build a pattern group
"""
def __init__(self, name: str):
self.group = PatternGroup(name, [], [])
def __init__(self, name: str = ''):
self.pattern_group = PatternGroup(name, [], [])

def set_ts_patterns(self, ts):
self.group.ts_patterns = ts
def set_ts_patterns(self, ts: list):
self.pattern_group.ts_patterns = ts
return self

def set_patterns(self, p):
self.group.patterns = p
def set_patterns(self, p: list):
self.pattern_group.patterns = p
return self

def add_ts_pattern(self, ts):
self.group.ts_patterns.append(ts)
def add_ts_pattern(self, ts: RegexPattern):
self.pattern_group.ts_patterns.append(ts)
return self

def add_pattern(self, p):
self.group.patterns.append(p)
def add_pattern(self, p: RegexPattern):
self.pattern_group.patterns.append(p)
return self

def build(self):
return self.group
return self.pattern_group

31 changes: 14 additions & 17 deletions Coffee/Data/DataPoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,30 +2,27 @@
# Licensed to MIT under a Contributor Agreement.

from datetime import datetime
from dataclasses import dataclass


@dataclass
class DataPoint:
def __init__(self, name: str, timestamp: datetime, value: dict, tags: list, meta: dict):
"""
init a datapoint
:param name: name of this datapoint
:param timestamp: timestamp of this datapoint
:param value: datapoint value
:param tags: for search filter when value is a dict type.
:param meta: some metadata
"""
self.name = name
self.timestamp = timestamp
self.value = value
self.tags = tags
self.meta = meta
# type of this datapoint
name: str
# timestamp of this datapoint
timestamp: datetime
# data values in key-value format
value: dict
# which keys should be a tag
tags: list
# meta data of this datapoint
meta: dict

def timestamp_ms(self) -> int:
return int(self.timestamp.timestamp() * 1000)

def timestamp_s(self) -> float:
return self.timestamp.timestamp()
def timestamp_s(self) -> int:
return int(self.timestamp.timestamp())

@staticmethod
def make_meta_datapoint(meta):
Expand Down
2 changes: 1 addition & 1 deletion Coffee/Data/DataProcessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def finish(self, datapoint: DataPoint) -> DataPoint:
pass


class DummySink(DataSink):
class BypassDataSink(DataSink):
def on_data(self, datapoint: DataPoint) -> DataPoint:
return datapoint

Expand Down
5 changes: 3 additions & 2 deletions Coffee/Service/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
https://docs.djangoproject.com/en/4.1/ref/settings/
"""

import os
from pathlib import Path

# Build paths inside the project like this: BASE_DIR / 'subdir'.
Expand Down Expand Up @@ -76,8 +77,8 @@
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.sqlite3',
#'NAME': BASE_DIR / 'db.sqlite3',
'NAME': '/Users/xujian/CodeBase/coffee/Coffee/db.sqlite3'
# 'NAME': BASE_DIR / 'db.sqlite3',
'NAME': os.path.join(os.path.expanduser('~'), '.coffee', 'db.sqlite3')
}
}

Expand Down
33 changes: 33 additions & 0 deletions examples/log_extract.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from Coffee.Data import *
import pandas as pd
import matplotlib.pyplot as plt


class DataAggregator1(DataAggregator):
def on_data(self, datapoint: DataPoint) -> DataPoint:
value = datapoint.value
value['datetime'] = datapoint.timestamp
self.all_points.append(value)
return datapoint


agg = DataAggregator1()


LogFileDataLoader("1.log").set_ts_patterns(
DEFAULT_TS_PATTERNS
).add_pattern(
RegexPattern(name="a_pattern",
pattern=r'(\d+),(\d+)',
fields={
'a': int,
'b': int,
},
tags=[('a', 'A')],
version='1.0')
).add_sink(agg).start()

df = pd.DataFrame(agg.all_points)
print(df)
df.plot(x='datetime', y=['a', 'b'], kind='line')
plt.show()
2 changes: 1 addition & 1 deletion examples/simple_playbooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def play(self):
2022-08-13 12:00:04.000 1234,6
2022-08-13 12:00:08.000 1234,0
"""
LogFileDataLoader(self.log, self.pattern_group).add_sink(
LogFileDataLoader(self.log).set_pattern_group(self.pattern_group).add_sink(
# upload data to influxdb
InfluxDBDataSink(DEF_TSDB)
).add_sink(
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,5 @@ django
celery
django-rest-framework
redis
pandas
matplotlib
4 changes: 3 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@
'python-daemon',
'django',
'celery',
'redis'
'redis',
'pandas',
'matplotlib'
],
dependency_links=[
],
Expand Down

0 comments on commit 8511d19

Please sign in to comment.