diff --git a/import_logs.py b/import_logs.py index 80ea4ae..0cb43b1 100755 --- a/import_logs.py +++ b/import_logs.py @@ -145,16 +145,22 @@ def check_format(self, file): def check_format_line(self, line): return False -class JsonFormat(BaseFormat): +class NginxJsonFormat(BaseFormat): def __init__(self, name): - super(JsonFormat, self).__init__(name) + super(NginxJsonFormat, self).__init__(name) self.json = None self.date_format = '%Y-%m-%dT%H:%M:%S' def check_format_line(self, line): try: self.json = json.loads(line) - return True + + # Check if it contains these: "idsite", "referrer", and date". + # This is unique to nginx, we can use this to tell it apart from other json log formats. + if "idsite" in self.json and "referrer" in self.json and "date" in self.json: + return True + + return False except: return False @@ -185,7 +191,7 @@ def get(self, key): return self.json[key] except KeyError: raise BaseFormatException() - + def get_all(self,): return self.json @@ -193,6 +199,85 @@ def remove_ignored_groups(self, groups): for group in groups: del self.json[group] +class TraefikJsonFormat(BaseFormat): + + TRAEFIK_KEYS_MAPPING = { + 'date': 'time', + 'generation_time_milli': 'Duration', + 'host': 'RequestHost', + 'ip': 'ClientHost', + 'length': 'DownstreamContentSize', + 'method': 'RequestMethod', + 'path': 'RequestPath', + 'referrer': 'request_Referer', + 'status': 'DownstreamStatus', + 'userid': 'ClientUsername', + 'user_agent': 'request_User-Agent', + } + + def __init__(self, name): + super(TraefikJsonFormat, self).__init__(name) + self.json = None + self.date_format = '%Y-%m-%dT%H:%M:%S' + + def check_format_line(self, line): + try: + self.json = json.loads(line) + + # Check if it contains all of these: "level", "msg", and "time". + # This is unique to Traefik, we can use this to tell it apart from other json log formats. + if "msg" in self.json and "level" in self.json and "time" in self.json: + return True + + return False + except: + return False + + def match(self, line): + try: + self.json = json.loads(line) + return self + except: + self.json = None + return None + + def get(self, key): + + value = '' + try: + value = self.json[self.TRAEFIK_KEYS_MAPPING[key]] + + if key == 'generation_time_milli': + value = value / 1000000 + + # Patch date format ISO 8601, example: 2023-08-14T12:25:56+02:00 + if key == 'date': + tz = value[19:] # get the last part + self.json['timezone'] = tz.replace(':', '') + value = value[:19] + + except: + logging.debug("Could not find %s in Traefik log", key) + return '' + + return str(value) + + def get_all(self,): + modified_json = self.json.copy() + + REVERSED_KEYS_MAPPING = {v: k for k, v in self.TRAEFIK_KEYS_MAPPING.items()} + + for key in self.json: + new_key = REVERSED_KEYS_MAPPING.get(key, key) + if new_key != key: + modified_json[new_key] = modified_json.pop(key) + + return modified_json + + def remove_ignored_groups(self, groups): + for group in groups: + del self.json[group] + class RegexFormat(BaseFormat): def __init__(self, name, regex, date_format=None): @@ -503,7 +588,8 @@ def get(self, key): 's3': RegexFormat('s3', _S3_LOG_FORMAT), 'icecast2': RegexFormat('icecast2', _ICECAST2_LOG_FORMAT), 'elb': RegexFormat('elb', _ELB_LOG_FORMAT, '%Y-%m-%dT%H:%M:%S'), - 'nginx_json': JsonFormat('nginx_json'), + 'nginx_json': NginxJsonFormat('nginx_json'), + 'traefik_json': TraefikJsonFormat('traefik_json'), 'ovh': RegexFormat('ovh', _OVH_FORMAT), 'haproxy': RegexFormat('haproxy', _HAPROXY_FORMAT, '%d/%b/%Y:%H:%M:%S.%f'), 'gandi': RegexFormat('gandi', _GANDI_SIMPLE_HOSTING_FORMAT, '%d/%b/%Y:%H:%M:%S') diff --git a/tests/logs/traefik_json.log b/tests/logs/traefik_json.log new file mode 100644 index 0000000..c43c297 --- /dev/null +++ b/tests/logs/traefik_json.log @@ -0,0 +1 @@ +{"ClientAddr": "172.20.0.1:52458","ClientHost": "1.2.3.4","ClientPort":"52458","ClientUsername":"-","request_User-Agent":"Prometheus/2.40.5","request_Referer":"www.example.com","DownstreamContentSize":15,"DownstreamStatus":"200","Duration":10002425284,"OriginContentSize":15,"OriginDuration":10002302842,"OriginStatus":504,"Overhead":122442,"RequestAddr":"localhost","RequestContentSize":0,"RequestCount":1,"RequestHost":"localhost","RequestMethod":"GET","RequestPath":"/my/app/auth?id=1234&user=dude","RequestPort":"-","RequestProtocol":"HTTP/1.1","RequestScheme":"http","RetryAttempts":0,"RouterName":"my-router@file","ServiceAddr":"apache:80","ServiceName":"my-service@file","ServiceURL":{"Scheme":"http","Opaque":"","User":null,"Host":"apache:80","Path":"","RawPath":"","OmitHost":false,"ForceQuery":false,"RawQuery":"","Fragment":"","RawFragment":""},"StartLocal":"2023-08-11T11:39:21.773360139Z","StartUTC":"2023-08-11T11:39:21.773360139Z","entryPointName":"http","level":"info","msg":"","time":"2023-08-11T11:41:25"} diff --git a/tests/test_main.py b/tests/test_main.py index b7ae020..185d049 100644 --- a/tests/test_main.py +++ b/tests/test_main.py @@ -403,6 +403,19 @@ def check_nginx_json_groups(groups): assert groups['user_agent'] == 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17' assert groups['date'] == '2013-10-10T16:52:00+02:00' +def check_traefik_json_groups(groups): + assert groups['date'] == '2023-08-11T11:41:25' + assert groups['generation_time_milli'] == 10002425284 + assert groups['host'] == 'localhost' + assert groups['ip'] == '1.2.3.4' + assert groups['length'] == 15 + assert groups['method'] == 'GET' + assert groups['path'] == '/my/app/auth?id=1234&user=dude' + assert groups['referrer'] == 'www.example.com' + assert groups['status'] == '200' + assert groups['userid'] == '-' + assert groups['user_agent'] == 'Prometheus/2.40.5' + def check_icecast2_groups(groups): check_ncsa_extended_groups(groups)