From 04bf501b8d52875090bc8e9430b366adfd527f6b Mon Sep 17 00:00:00 2001 From: Seth Linn <117631186+sethlinnkuleuven@users.noreply.github.com> Date: Thu, 7 Sep 2023 12:24:42 +0200 Subject: [PATCH] Add Traefik json log support (#358) * Add Traefik JSON log format * Add Traefik JSON log format * Add Traefik json log support. What was done: 1. Expanded import_logs.py to auto recognize and import Traefik json log files. 2. Modified the NGINX json support, made the autodetect function more specific. It was to generic. All json formatted logs where seen as NGINX logs. 3. Added tests for Traefik log importing. * Fixed failing tests and refactored get_all method What was done: * The get_all method was returning the raw json without any key mapping. Fixed that. This meant also changing the names in test_main.yml * changed log file form CLF to LF --------- Co-authored-by: David Patiashvili --- import_logs.py | 96 +++++++++++++++++++++++++++++++++++-- tests/logs/traefik_json.log | 1 + tests/test_main.py | 13 +++++ 3 files changed, 105 insertions(+), 5 deletions(-) create mode 100644 tests/logs/traefik_json.log diff --git a/import_logs.py b/import_logs.py index 80ea4ae..0cb43b1 100755 --- a/import_logs.py +++ b/import_logs.py @@ -145,16 +145,22 @@ def check_format(self, file): def check_format_line(self, line): return False -class JsonFormat(BaseFormat): +class NginxJsonFormat(BaseFormat): def __init__(self, name): - super(JsonFormat, self).__init__(name) + super(NginxJsonFormat, self).__init__(name) self.json = None self.date_format = '%Y-%m-%dT%H:%M:%S' def check_format_line(self, line): try: self.json = json.loads(line) - return True + + # Check if it contains these: "idsite", "referrer", and date". + # This is unique to nginx, we can use this to tell it apart from other json log formats. + if "idsite" in self.json and "referrer" in self.json and "date" in self.json: + return True + + return False except: return False @@ -185,7 +191,7 @@ def get(self, key): return self.json[key] except KeyError: raise BaseFormatException() - + def get_all(self,): return self.json @@ -193,6 +199,85 @@ def remove_ignored_groups(self, groups): for group in groups: del self.json[group] +class TraefikJsonFormat(BaseFormat): + + TRAEFIK_KEYS_MAPPING = { + 'date': 'time', + 'generation_time_milli': 'Duration', + 'host': 'RequestHost', + 'ip': 'ClientHost', + 'length': 'DownstreamContentSize', + 'method': 'RequestMethod', + 'path': 'RequestPath', + 'referrer': 'request_Referer', + 'status': 'DownstreamStatus', + 'userid': 'ClientUsername', + 'user_agent': 'request_User-Agent', + } + + def __init__(self, name): + super(TraefikJsonFormat, self).__init__(name) + self.json = None + self.date_format = '%Y-%m-%dT%H:%M:%S' + + def check_format_line(self, line): + try: + self.json = json.loads(line) + + # Check if it contains all of these: "level", "msg", and "time". + # This is unique to Traefik, we can use this to tell it apart from other json log formats. + if "msg" in self.json and "level" in self.json and "time" in self.json: + return True + + return False + except: + return False + + def match(self, line): + try: + self.json = json.loads(line) + return self + except: + self.json = None + return None + + def get(self, key): + + value = '' + try: + value = self.json[self.TRAEFIK_KEYS_MAPPING[key]] + + if key == 'generation_time_milli': + value = value / 1000000 + + # Patch date format ISO 8601, example: 2023-08-14T12:25:56+02:00 + if key == 'date': + tz = value[19:] # get the last part + self.json['timezone'] = tz.replace(':', '') + value = value[:19] + + except: + logging.debug("Could not find %s in Traefik log", key) + return '' + + return str(value) + + def get_all(self,): + modified_json = self.json.copy() + + REVERSED_KEYS_MAPPING = {v: k for k, v in self.TRAEFIK_KEYS_MAPPING.items()} + + for key in self.json: + new_key = REVERSED_KEYS_MAPPING.get(key, key) + if new_key != key: + modified_json[new_key] = modified_json.pop(key) + + return modified_json + + def remove_ignored_groups(self, groups): + for group in groups: + del self.json[group] + class RegexFormat(BaseFormat): def __init__(self, name, regex, date_format=None): @@ -503,7 +588,8 @@ def get(self, key): 's3': RegexFormat('s3', _S3_LOG_FORMAT), 'icecast2': RegexFormat('icecast2', _ICECAST2_LOG_FORMAT), 'elb': RegexFormat('elb', _ELB_LOG_FORMAT, '%Y-%m-%dT%H:%M:%S'), - 'nginx_json': JsonFormat('nginx_json'), + 'nginx_json': NginxJsonFormat('nginx_json'), + 'traefik_json': TraefikJsonFormat('traefik_json'), 'ovh': RegexFormat('ovh', _OVH_FORMAT), 'haproxy': RegexFormat('haproxy', _HAPROXY_FORMAT, '%d/%b/%Y:%H:%M:%S.%f'), 'gandi': RegexFormat('gandi', _GANDI_SIMPLE_HOSTING_FORMAT, '%d/%b/%Y:%H:%M:%S') diff --git a/tests/logs/traefik_json.log b/tests/logs/traefik_json.log new file mode 100644 index 0000000..c43c297 --- /dev/null +++ b/tests/logs/traefik_json.log @@ -0,0 +1 @@ +{"ClientAddr": "172.20.0.1:52458","ClientHost": "1.2.3.4","ClientPort":"52458","ClientUsername":"-","request_User-Agent":"Prometheus/2.40.5","request_Referer":"www.example.com","DownstreamContentSize":15,"DownstreamStatus":"200","Duration":10002425284,"OriginContentSize":15,"OriginDuration":10002302842,"OriginStatus":504,"Overhead":122442,"RequestAddr":"localhost","RequestContentSize":0,"RequestCount":1,"RequestHost":"localhost","RequestMethod":"GET","RequestPath":"/my/app/auth?id=1234&user=dude","RequestPort":"-","RequestProtocol":"HTTP/1.1","RequestScheme":"http","RetryAttempts":0,"RouterName":"my-router@file","ServiceAddr":"apache:80","ServiceName":"my-service@file","ServiceURL":{"Scheme":"http","Opaque":"","User":null,"Host":"apache:80","Path":"","RawPath":"","OmitHost":false,"ForceQuery":false,"RawQuery":"","Fragment":"","RawFragment":""},"StartLocal":"2023-08-11T11:39:21.773360139Z","StartUTC":"2023-08-11T11:39:21.773360139Z","entryPointName":"http","level":"info","msg":"","time":"2023-08-11T11:41:25"} diff --git a/tests/test_main.py b/tests/test_main.py index b7ae020..185d049 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -403,6 +403,19 @@ def check_nginx_json_groups(groups): assert groups['user_agent'] == 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17' assert groups['date'] == '2013-10-10T16:52:00+02:00' +def check_traefik_json_groups(groups): + assert groups['date'] == '2023-08-11T11:41:25' + assert groups['generation_time_milli'] == 10002425284 + assert groups['host'] == 'localhost' + assert groups['ip'] == '1.2.3.4' + assert groups['length'] == 15 + assert groups['method'] == 'GET' + assert groups['path'] == '/my/app/auth?id=1234&user=dude' + assert groups['referrer'] == 'www.example.com' + assert groups['status'] == '200' + assert groups['userid'] == '-' + assert groups['user_agent'] == 'Prometheus/2.40.5' + def check_icecast2_groups(groups): check_ncsa_extended_groups(groups)