From 158b6a1b573c7ec12dcace5e6ea7ab96491a24c1 Mon Sep 17 00:00:00 2001 From: David Patiashvili Date: Thu, 21 Oct 2021 15:34:00 +0200 Subject: [PATCH 1/4] Add Traefik JSON log format --- import_logs.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/import_logs.py b/import_logs.py index f38a5ad..26f88df 100755 --- a/import_logs.py +++ b/import_logs.py @@ -193,6 +193,48 @@ def remove_ignored_groups(self, groups): for group in groups: del self.json[group] +class TraefikJsonFormat(JsonFormat): + + KEYS_MAPPING = { + # 'event_category': '', + # 'event_action': '', + # 'event_name': '', + # 'query_string': '', + 'date': 'time', + 'generation_time_milli': 'Duration', + 'host': 'RequestHost', + 'ip': 'ClientHost', + 'length': 'DownstreamContentSize', + 'method': 'RequestMethod', + 'path': 'RequestPath', + 'referrer': 'request_Referer', + 'status': 'OriginStatus', + 'userid': 'ClientUsername', + 'user_agent': 'request_User-Agent', + } + + def __init__(self, name): + super(JsonFormat, self).__init__(name) + self.date_format = '%Y-%m-%dT%H:%M:%S' + + def get(self, key): + if key not in self.KEYS_MAPPING: + raise BaseFormatException() + + if key == 'timezone': + return '00:00' + + value = '' + if self.KEYS_MAPPING[key] in self.json: + value = self.json[self.KEYS_MAPPING[key]] + + if key == 'generation_time_milli': + value = value / 100000 + if key == 'date': + value = str(value).replace('Z', '') + + return str(value) + class RegexFormat(BaseFormat): def __init__(self, name, regex, date_format=None): @@ -504,6 +546,7 @@ def get(self, key): 'icecast2': RegexFormat('icecast2', _ICECAST2_LOG_FORMAT), 'elb': RegexFormat('elb', _ELB_LOG_FORMAT, '%Y-%m-%dT%H:%M:%S'), 'nginx_json': JsonFormat('nginx_json'), + 'traefik_json': TraefikJsonFormat('traefik_json'), 'ovh': RegexFormat('ovh', _OVH_FORMAT), 'haproxy': RegexFormat('haproxy', _HAPROXY_FORMAT, '%d/%b/%Y:%H:%M:%S.%f'), 'gandi': RegexFormat('gandi', _GANDI_SIMPLE_HOSTING_FORMAT, '%d/%b/%Y:%H:%M:%S') From 4eb0ea50002d70c0a0b5376cbea9fd13efa426a5 Mon Sep 17 00:00:00 2001 From: David Patiashvili Date: Thu, 21 Oct 2021 15:34:00 +0200 Subject: [PATCH 2/4] Add Traefik JSON log format --- import_logs.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/import_logs.py b/import_logs.py index 80ea4ae..0604a94 100755 --- a/import_logs.py +++ b/import_logs.py @@ -193,6 +193,48 @@ def remove_ignored_groups(self, groups): for group in groups: del self.json[group] +class TraefikJsonFormat(JsonFormat): + + KEYS_MAPPING = { + # 'event_category': '', + # 'event_action': '', + # 'event_name': '', + # 'query_string': '', + 'date': 'time', + 'generation_time_milli': 'Duration', + 'host': 'RequestHost', + 'ip': 'ClientHost', + 'length': 'DownstreamContentSize', + 'method': 'RequestMethod', + 'path': 'RequestPath', + 'referrer': 'request_Referer', + 'status': 'OriginStatus', + 'userid': 'ClientUsername', + 'user_agent': 'request_User-Agent', + } + + def __init__(self, name): + super(JsonFormat, self).__init__(name) + self.date_format = '%Y-%m-%dT%H:%M:%S' + + def get(self, key): + if key not in self.KEYS_MAPPING: + raise BaseFormatException() + + if key == 'timezone': + return '00:00' + + value = '' + if self.KEYS_MAPPING[key] in self.json: + value = self.json[self.KEYS_MAPPING[key]] + + if key == 'generation_time_milli': + value = value / 100000 + if key == 'date': + value = str(value).replace('Z', '') + + return str(value) + class RegexFormat(BaseFormat): def __init__(self, name, regex, date_format=None): @@ -504,6 +546,7 @@ def get(self, key): 'icecast2': RegexFormat('icecast2', _ICECAST2_LOG_FORMAT), 'elb': RegexFormat('elb', _ELB_LOG_FORMAT, '%Y-%m-%dT%H:%M:%S'), 'nginx_json': JsonFormat('nginx_json'), + 'traefik_json': TraefikJsonFormat('traefik_json'), 'ovh': RegexFormat('ovh', _OVH_FORMAT), 'haproxy': RegexFormat('haproxy', _HAPROXY_FORMAT, '%d/%b/%Y:%H:%M:%S.%f'), 'gandi': RegexFormat('gandi', _GANDI_SIMPLE_HOSTING_FORMAT, '%d/%b/%Y:%H:%M:%S') From bef3ace32225086dfcbcdda6db98671082b34a0d Mon Sep 17 00:00:00 2001 From: Seth Linn Date: Fri, 18 Aug 2023 15:38:50 +0200 Subject: [PATCH 3/4] Add Traefik json log support. What was done: 1. Expanded import_logs.py to auto recognize and import Traefik json log files. 2. Modified the NGINX json support, made the autodetect function more specific. It was to generic. All json formatted logs where seen as NGINX logs. 3. Added tests for Traefik log importing. --- .github/workflows/tests.yml | 2 +- import_logs.py | 84 ++++++++++++++++++++++++++----------- tests/logs/traefik_json.log | 1 + tests/test_main.py | 13 ++++++ 4 files changed, 74 insertions(+), 26 deletions(-) create mode 100644 tests/logs/traefik_json.log diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 3a91e7f..4190453 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -22,7 +22,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: [ '3.5', '3.6', '3.7', '3.8', '3.9', '3.10' ] + python-version: ['3.10.12'] # [ '3.5', '3.6', '3.7', '3.8', '3.9', '3.10' ] name: Tests (Python ${{ matrix.python-version }}) steps: - uses: actions/checkout@v2 diff --git a/import_logs.py b/import_logs.py index 0604a94..3fc0a7d 100755 --- a/import_logs.py +++ b/import_logs.py @@ -145,16 +145,22 @@ def check_format(self, file): def check_format_line(self, line): return False -class JsonFormat(BaseFormat): +class NginxJsonFormat(BaseFormat): def __init__(self, name): - super(JsonFormat, self).__init__(name) + super(NginxJsonFormat, self).__init__(name) self.json = None self.date_format = '%Y-%m-%dT%H:%M:%S' def check_format_line(self, line): try: self.json = json.loads(line) - return True + + # Check if it contains these: "idsite", "referrer", and date". + # This is unique to nginx, we can use this to tell it apart from other json log formats. + if "idsite" in self.json and "referrer" in self.json and "date" in self.json: + return True + + return False except: return False @@ -185,7 +191,7 @@ def get(self, key): return self.json[key] except KeyError: raise BaseFormatException() - + def get_all(self,): return self.json @@ -193,13 +199,9 @@ def remove_ignored_groups(self, groups): for group in groups: del self.json[group] -class TraefikJsonFormat(JsonFormat): +class TraefikJsonFormat(BaseFormat): - KEYS_MAPPING = { - # 'event_category': '', - # 'event_action': '', - # 'event_name': '', - # 'query_string': '', + TRAEFIK_KEYS_MAPPING = { 'date': 'time', 'generation_time_milli': 'Duration', 'host': 'RequestHost', @@ -208,32 +210,64 @@ class TraefikJsonFormat(JsonFormat): 'method': 'RequestMethod', 'path': 'RequestPath', 'referrer': 'request_Referer', - 'status': 'OriginStatus', + 'status': 'DownstreamStatus', 'userid': 'ClientUsername', 'user_agent': 'request_User-Agent', } def __init__(self, name): - super(JsonFormat, self).__init__(name) + super(TraefikJsonFormat, self).__init__(name) + self.json = None self.date_format = '%Y-%m-%dT%H:%M:%S' + + def check_format_line(self, line): + try: + self.json = json.loads(line) - def get(self, key): - if key not in self.KEYS_MAPPING: - raise BaseFormatException() + # Check if it contains all of these: "level", "msg", and "time". + # This is unique to Traefik, we can use this to tell it apart from other json log formats. + if "msg" in self.json and "level" in self.json and "time" in self.json: + return True + + return False + except: + return False + + def match(self, line): + try: + self.json = json.loads(line) + return self + except: + self.json = None + return None - if key == 'timezone': - return '00:00' + def get(self, key): value = '' - if self.KEYS_MAPPING[key] in self.json: - value = self.json[self.KEYS_MAPPING[key]] + try: + value = self.json[self.TRAEFIK_KEYS_MAPPING[key]] + + if key == 'generation_time_milli': + value = value / 1000000 + + # Patch date format ISO 8601, example: 2023-08-14T12:25:56+02:00 + if key == 'date': + tz = value[19:] # get the last part + self.json['timezone'] = tz.replace(':', '') + value = value[:19] - if key == 'generation_time_milli': - value = value / 100000 - if key == 'date': - value = str(value).replace('Z', '') + except: + logging.debug("Could not find %s in Traefik log", key) + return '' + + return str(value) + + def get_all(self,): + return self.json - return str(value) + def remove_ignored_groups(self, groups): + for group in groups: + del self.json[group] class RegexFormat(BaseFormat): @@ -545,7 +579,7 @@ def get(self, key): 's3': RegexFormat('s3', _S3_LOG_FORMAT), 'icecast2': RegexFormat('icecast2', _ICECAST2_LOG_FORMAT), 'elb': RegexFormat('elb', _ELB_LOG_FORMAT, '%Y-%m-%dT%H:%M:%S'), - 'nginx_json': JsonFormat('nginx_json'), + 'nginx_json': NginxJsonFormat('nginx_json'), 'traefik_json': TraefikJsonFormat('traefik_json'), 'ovh': RegexFormat('ovh', _OVH_FORMAT), 'haproxy': RegexFormat('haproxy', _HAPROXY_FORMAT, '%d/%b/%Y:%H:%M:%S.%f'), diff --git a/tests/logs/traefik_json.log b/tests/logs/traefik_json.log new file mode 100644 index 0000000..90a43cd --- /dev/null +++ b/tests/logs/traefik_json.log @@ -0,0 +1 @@ +{"ClientAddr":"172.20.0.1:52458","ClientHost":"172.0.0.1","ClientPort":"52458","ClientUsername":"-","request_User-Agent":"Prometheus/2.40.5","request_Referer":"www.example.com","DownstreamContentSize":15,"DownstreamStatus":200,"Duration":10002425284,"OriginContentSize":15,"OriginDuration":10002302842,"OriginStatus":504,"Overhead":122442,"RequestAddr":"localhost","RequestContentSize":0,"RequestCount":1,"RequestHost":"localhost","RequestMethod":"GET","RequestPath":"/my/app/auth?id=1234&user=dude","RequestPort":"-","RequestProtocol":"HTTP/1.1","RequestScheme":"http","RetryAttempts":0,"RouterName":"my-router@file","ServiceAddr":"apache:80","ServiceName":"my-service@file","ServiceURL":{"Scheme":"http","Opaque":"","User":null,"Host":"apache:80","Path":"","RawPath":"","OmitHost":false,"ForceQuery":false,"RawQuery":"","Fragment":"","RawFragment":""},"StartLocal":"2023-08-11T11:39:21.773360139Z","StartUTC":"2023-08-11T11:39:21.773360139Z","entryPointName":"http","level":"info","msg":"","time":"2023-08-11T11:41:25Z"} \ No newline at end of file diff --git a/tests/test_main.py b/tests/test_main.py index b7ae020..9d0309a 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -403,6 +403,19 @@ def check_nginx_json_groups(groups): assert groups['user_agent'] == 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17' assert groups['date'] == '2013-10-10T16:52:00+02:00' +def check_traefik_json_groups(groups): + assert groups['time'] == '2023-08-11T11:41:25Z' + assert groups['Duration'] == 10002425284 + assert groups['RequestHost'] == 'localhost' + assert groups['ClientHost'] == '172.0.0.1' + assert groups['DownstreamContentSize'] == 15 + assert groups['RequestMethod'] == 'GET' + assert groups['RequestPath'] == '/my/app/auth?id=1234&user=dude' + assert groups['request_Referer'] == '/my/app/auth?id=1234&user=dude' + assert groups['DownstreamStatus'] == '200' + assert groups['ClientUsername'] == '-' + assert groups['request_User-Agent'] == 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17' + def check_icecast2_groups(groups): check_ncsa_extended_groups(groups) From f0447954629768c52a2cff5b8279f6a394d4eb5c Mon Sep 17 00:00:00 2001 From: Seth Linn Date: Wed, 23 Aug 2023 09:52:06 +0200 Subject: [PATCH 4/4] Fixed failing tests and refactored get_all method What was done: * The get_all method was returning the raw json without any key mapping. Fixed that. This meant also changing the names in test_main.yml * changed log file form CLF to LF --- .github/workflows/tests.yml | 2 +- import_logs.py | 11 ++++++++++- tests/logs/traefik_json.log | 2 +- tests/test_main.py | 22 +++++++++++----------- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 4190453..3a91e7f 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -22,7 +22,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.10.12'] # [ '3.5', '3.6', '3.7', '3.8', '3.9', '3.10' ] + python-version: [ '3.5', '3.6', '3.7', '3.8', '3.9', '3.10' ] name: Tests (Python ${{ matrix.python-version }}) steps: - uses: actions/checkout@v2 diff --git a/import_logs.py b/import_logs.py index 3fc0a7d..0cb43b1 100755 --- a/import_logs.py +++ b/import_logs.py @@ -263,7 +263,16 @@ def get(self, key): return str(value) def get_all(self,): - return self.json + modified_json = self.json.copy() + + REVERSED_KEYS_MAPPING = {v: k for k, v in self.TRAEFIK_KEYS_MAPPING.items()} + + for key in self.json: + new_key = REVERSED_KEYS_MAPPING.get(key, key) + if new_key != key: + modified_json[new_key] = modified_json.pop(key) + + return modified_json def remove_ignored_groups(self, groups): for group in groups: diff --git a/tests/logs/traefik_json.log b/tests/logs/traefik_json.log index 90a43cd..c43c297 100644 --- a/tests/logs/traefik_json.log +++ b/tests/logs/traefik_json.log @@ -1 +1 @@ -{"ClientAddr":"172.20.0.1:52458","ClientHost":"172.0.0.1","ClientPort":"52458","ClientUsername":"-","request_User-Agent":"Prometheus/2.40.5","request_Referer":"www.example.com","DownstreamContentSize":15,"DownstreamStatus":200,"Duration":10002425284,"OriginContentSize":15,"OriginDuration":10002302842,"OriginStatus":504,"Overhead":122442,"RequestAddr":"localhost","RequestContentSize":0,"RequestCount":1,"RequestHost":"localhost","RequestMethod":"GET","RequestPath":"/my/app/auth?id=1234&user=dude","RequestPort":"-","RequestProtocol":"HTTP/1.1","RequestScheme":"http","RetryAttempts":0,"RouterName":"my-router@file","ServiceAddr":"apache:80","ServiceName":"my-service@file","ServiceURL":{"Scheme":"http","Opaque":"","User":null,"Host":"apache:80","Path":"","RawPath":"","OmitHost":false,"ForceQuery":false,"RawQuery":"","Fragment":"","RawFragment":""},"StartLocal":"2023-08-11T11:39:21.773360139Z","StartUTC":"2023-08-11T11:39:21.773360139Z","entryPointName":"http","level":"info","msg":"","time":"2023-08-11T11:41:25Z"} \ No newline at end of file +{"ClientAddr": "172.20.0.1:52458","ClientHost": "1.2.3.4","ClientPort":"52458","ClientUsername":"-","request_User-Agent":"Prometheus/2.40.5","request_Referer":"www.example.com","DownstreamContentSize":15,"DownstreamStatus":"200","Duration":10002425284,"OriginContentSize":15,"OriginDuration":10002302842,"OriginStatus":504,"Overhead":122442,"RequestAddr":"localhost","RequestContentSize":0,"RequestCount":1,"RequestHost":"localhost","RequestMethod":"GET","RequestPath":"/my/app/auth?id=1234&user=dude","RequestPort":"-","RequestProtocol":"HTTP/1.1","RequestScheme":"http","RetryAttempts":0,"RouterName":"my-router@file","ServiceAddr":"apache:80","ServiceName":"my-service@file","ServiceURL":{"Scheme":"http","Opaque":"","User":null,"Host":"apache:80","Path":"","RawPath":"","OmitHost":false,"ForceQuery":false,"RawQuery":"","Fragment":"","RawFragment":""},"StartLocal":"2023-08-11T11:39:21.773360139Z","StartUTC":"2023-08-11T11:39:21.773360139Z","entryPointName":"http","level":"info","msg":"","time":"2023-08-11T11:41:25"} diff --git a/tests/test_main.py b/tests/test_main.py index 9d0309a..185d049 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -404,17 +404,17 @@ def check_nginx_json_groups(groups): assert groups['date'] == '2013-10-10T16:52:00+02:00' def check_traefik_json_groups(groups): - assert groups['time'] == '2023-08-11T11:41:25Z' - assert groups['Duration'] == 10002425284 - assert groups['RequestHost'] == 'localhost' - assert groups['ClientHost'] == '172.0.0.1' - assert groups['DownstreamContentSize'] == 15 - assert groups['RequestMethod'] == 'GET' - assert groups['RequestPath'] == '/my/app/auth?id=1234&user=dude' - assert groups['request_Referer'] == '/my/app/auth?id=1234&user=dude' - assert groups['DownstreamStatus'] == '200' - assert groups['ClientUsername'] == '-' - assert groups['request_User-Agent'] == 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17' + assert groups['date'] == '2023-08-11T11:41:25' + assert groups['generation_time_milli'] == 10002425284 + assert groups['host'] == 'localhost' + assert groups['ip'] == '1.2.3.4' + assert groups['length'] == 15 + assert groups['method'] == 'GET' + assert groups['path'] == '/my/app/auth?id=1234&user=dude' + assert groups['referrer'] == 'www.example.com' + assert groups['status'] == '200' + assert groups['userid'] == '-' + assert groups['user_agent'] == 'Prometheus/2.40.5' def check_icecast2_groups(groups): check_ncsa_extended_groups(groups)