From e967778f25044760dfcde200eed94c4f2de29e94 Mon Sep 17 00:00:00 2001
From: Sean Whalen <44679+seanthegeek@users.noreply.github.com>
Date: Wed, 4 Sep 2024 16:31:41 -0400
Subject: [PATCH] 8.15.0

- Fix processing of SMTP-TLS reports ([#549](https://github.com/domainaware/parsedmarc/issues/549)), which broke in commit [410663d ](https://github.com/domainaware/parsedmarc/commit/410663dbcaba019ca3d3744946348b56a635480b)(PR [#530](https://github.com/domainaware/parsedmarc/pull/530))
  - This PR enforced a stricter check for base64-encoded strings, which SMTP TLS reports from Google did not pass
  - Removing the check introduced its own issue, because some file paths were treated as base64-encoded strings
- Create a separate `extract_report_from_file_path()` function for processioning reports based on a file path
- Remove report extraction based on a file path from `extract_report()`
---
 CHANGELOG.md           | 11 ++++++++++-
 parsedmarc/__init__.py | 33 +++++++++++++++++++--------------
 tests.py               |  4 ++--
 3 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 79378691..0022640d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,15 @@
 Changelog
 =========
 
+8.15.0
+------
+
+- Fix processing of SMTP-TLS reports ([#549](https://github.com/domainaware/parsedmarc/issues/549)), which broke in commit [410663d ](https://github.com/domainaware/parsedmarc/commit/410663dbcaba019ca3d3744946348b56a635480b)(PR [#530](https://github.com/domainaware/parsedmarc/pull/530))
+  - This PR enforced a stricter check for base64-encoded strings, which SMTP TLS reports from Google did not pass
+  - Removing the check introduced its own issue, because some file paths were treated as base64-encoded strings
+- Create a separate `extract_report_from_file_path()` function for processioning reports based on a file path
+- Remove report extraction based on a file path from `extract_report()`
+
 8.14.2
 ------
 
@@ -9,7 +18,7 @@ Changelog
 8.14.1
 ------
 
-- Fix processing of SMTP-TLS reports (#549)
+- Failed attempt to fix processing of SMTP-TLS reports (#549)
 
 8.14.0
 ------
diff --git a/parsedmarc/__init__.py b/parsedmarc/__init__.py
index 1ec1c585..bb99ce1b 100644
--- a/parsedmarc/__init__.py
+++ b/parsedmarc/__init__.py
@@ -34,7 +34,7 @@
 from parsedmarc.utils import parse_email
 from parsedmarc.utils import timestamp_to_human, human_timestamp_to_datetime
 
-__version__ = "8.14.2"
+__version__ = "8.15.0"
 
 logger.debug("parsedmarc v{0}".format(__version__))
 
@@ -585,32 +585,31 @@ def parse_aggregate_report_xml(
             "Unexpected error: {0}".format(error.__str__()))
 
 
-def extract_report(input_):
+def extract_report(content):
     """
-    Extracts text from a zip or gzip file at the given path, file-like object,
+    Extracts text from a zip or gzip file, as a base64-encoded string, file-like object,
     or bytes.
 
     Args:
-        input_: A path to a file, a file like object, or bytes
+        content: report file as a base64-encoded string, file-like object or bytes.
 
     Returns:
         str: The extracted text
 
     """
+    file_object = None
     try:
-        file_object = None
-        if isinstance(input_, str):
+        if isinstance(content, str):
             try:
-                input_ = b64decode(input_, validate=True)
-                file_object = BytesIO(input_)
+                file_object = BytesIO(b64decode(content))
             except binascii.Error:
                 pass
             if file_object is None:
-                file_object = open(input_, "rb")
-        elif type(input_) is bytes:
-            file_object = BytesIO(input_)
+                file_object = open(content, "rb")
+        elif type(content) is bytes:
+            file_object = BytesIO(content)
         else:
-            file_object = input_
+            file_object = content
 
         header = file_object.read(6)
         file_object.seek(0)
@@ -630,8 +629,6 @@ def extract_report(input_):
 
         file_object.close()
 
-    except FileNotFoundError:
-        raise ParserError("File was not found")
     except UnicodeDecodeError:
         file_object.close()
         raise ParserError("File objects must be opened in binary (rb) mode")
@@ -642,6 +639,14 @@ def extract_report(input_):
 
     return report
 
+def extract_report_from_file_path(file_path):
+    """Extracts report from a file at the given file_path"""
+    try:
+        with open(file_path, "rb") as report_file:
+            return extract_report(report_file.read())
+    except FileNotFoundError:
+        raise ParserError("File was not found")
+
 
 def parse_aggregate_report_file(
         _input,
diff --git a/tests.py b/tests.py
index 75a43142..464e63a3 100644
--- a/tests.py
+++ b/tests.py
@@ -82,7 +82,7 @@ def testExtractReportGZip(self):
         print()
         file = "samples/extract_report/nice-input.xml.gz"
         print("Testing {0}: " .format(file), end="")
-        xmlout = parsedmarc.extract_report(file)
+        xmlout = parsedmarc.extract_report_from_file_path(file)
         xmlin = open("samples/extract_report/nice-input.xml").read()
         self.assertTrue(compare_xml(xmlout, xmlin))
         print("Passed!")
@@ -92,7 +92,7 @@ def testExtractReportZip(self):
         print()
         file = "samples/extract_report/nice-input.xml.zip"
         print("Testing {0}: " .format(file), end="")
-        xmlout = parsedmarc.extract_report(file)
+        xmlout = parsedmarc.extract_report_from_file_path(file)
         print(xmlout)
         xmlin = minify_xml(open(
             "samples/extract_report/nice-input.xml").read())