From e967778f25044760dfcde200eed94c4f2de29e94 Mon Sep 17 00:00:00 2001 From: Sean Whalen <44679+seanthegeek@users.noreply.github.com> Date: Wed, 4 Sep 2024 16:31:41 -0400 Subject: [PATCH] 8.15.0 - Fix processing of SMTP-TLS reports ([#549](https://github.com/domainaware/parsedmarc/issues/549)), which broke in commit [410663d ](https://github.com/domainaware/parsedmarc/commit/410663dbcaba019ca3d3744946348b56a635480b)(PR [#530](https://github.com/domainaware/parsedmarc/pull/530)) - This PR enforced a stricter check for base64-encoded strings, which SMTP TLS reports from Google did not pass - Removing the check introduced its own issue, because some file paths were treated as base64-encoded strings - Create a separate `extract_report_from_file_path()` function for processioning reports based on a file path - Remove report extraction based on a file path from `extract_report()` --- CHANGELOG.md | 11 ++++++++++- parsedmarc/__init__.py | 33 +++++++++++++++++++-------------- tests.py | 4 ++-- 3 files changed, 31 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 79378691..0022640d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,15 @@ Changelog ========= +8.15.0 +------ + +- Fix processing of SMTP-TLS reports ([#549](https://github.com/domainaware/parsedmarc/issues/549)), which broke in commit [410663d ](https://github.com/domainaware/parsedmarc/commit/410663dbcaba019ca3d3744946348b56a635480b)(PR [#530](https://github.com/domainaware/parsedmarc/pull/530)) + - This PR enforced a stricter check for base64-encoded strings, which SMTP TLS reports from Google did not pass + - Removing the check introduced its own issue, because some file paths were treated as base64-encoded strings +- Create a separate `extract_report_from_file_path()` function for processioning reports based on a file path +- Remove report extraction based on a file path from `extract_report()` + 8.14.2 ------ @@ -9,7 +18,7 @@ Changelog 8.14.1 ------ -- Fix processing of SMTP-TLS reports (#549) +- Failed attempt to fix processing of SMTP-TLS reports (#549) 8.14.0 ------ diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py index 1ec1c585..bb99ce1b 100644 --- a/parsedmarc/__init__.py +++ b/parsedmarc/__init__.py @@ -34,7 +34,7 @@ from parsedmarc.utils import parse_email from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime -__version__ = "8.14.2" +__version__ = "8.15.0" logger.debug("parsedmarc v{0}".format(__version__)) @@ -585,32 +585,31 @@ def parse_aggregate_report_xml( "Unexpected error: {0}".format(error.__str__())) -def extract_report(input_): +def extract_report(content): """ - Extracts text from a zip or gzip file at the given path, file-like object, + Extracts text from a zip or gzip file, as a base64-encoded string, file-like object, or bytes. Args: - input_: A path to a file, a file like object, or bytes + content: report file as a base64-encoded string, file-like object or bytes. Returns: str: The extracted text """ + file_object = None try: - file_object = None - if isinstance(input_, str): + if isinstance(content, str): try: - input_ = b64decode(input_, validate=True) - file_object = BytesIO(input_) + file_object = BytesIO(b64decode(content)) except binascii.Error: pass if file_object is None: - file_object = open(input_, "rb") - elif type(input_) is bytes: - file_object = BytesIO(input_) + file_object = open(content, "rb") + elif type(content) is bytes: + file_object = BytesIO(content) else: - file_object = input_ + file_object = content header = file_object.read(6) file_object.seek(0) @@ -630,8 +629,6 @@ def extract_report(input_): file_object.close() - except FileNotFoundError: - raise ParserError("File was not found") except UnicodeDecodeError: file_object.close() raise ParserError("File objects must be opened in binary (rb) mode") @@ -642,6 +639,14 @@ def extract_report(input_): return report +def extract_report_from_file_path(file_path): + """Extracts report from a file at the given file_path""" + try: + with open(file_path, "rb") as report_file: + return extract_report(report_file.read()) + except FileNotFoundError: + raise ParserError("File was not found") + def parse_aggregate_report_file( _input, diff --git a/tests.py b/tests.py index 75a43142..464e63a3 100644 --- a/tests.py +++ b/tests.py @@ -82,7 +82,7 @@ def testExtractReportGZip(self): print() file = "samples/extract_report/nice-input.xml.gz" print("Testing {0}: " .format(file), end="") - xmlout = parsedmarc.extract_report(file) + xmlout = parsedmarc.extract_report_from_file_path(file) xmlin = open("samples/extract_report/nice-input.xml").read() self.assertTrue(compare_xml(xmlout, xmlin)) print("Passed!") @@ -92,7 +92,7 @@ def testExtractReportZip(self): print() file = "samples/extract_report/nice-input.xml.zip" print("Testing {0}: " .format(file), end="") - xmlout = parsedmarc.extract_report(file) + xmlout = parsedmarc.extract_report_from_file_path(file) print(xmlout) xmlin = minify_xml(open( "samples/extract_report/nice-input.xml").read())