Skip to content

Commit

Permalink
Merge pull request #366 from TeamMsgExtractor/next-release
Browse files Browse the repository at this point in the history
Fix for #365 (v0.41.3)
  • Loading branch information
TheElementalOfDestruction authored Jun 11, 2023
2 parents e8403b7 + cfc9682 commit ac77e62
Show file tree
Hide file tree
Showing 5 changed files with 42 additions and 12 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
**v0.41.3**
* [[TeamMsgExtractor #365](https://github.com/TeamMsgExtractor/msg-extractor/issues/365)] Fixed an issue that would cause certain values retrieved from the header to not be decoded properly. It does this when retrieving the values, so nothing about the header has been changed.
* Added new property `MessageBase.headerText` which is the text content of the header stream. Adjusted other things to use this instead of trying to retrieve the stream directly in multiple places.
* Added typing to `MessageBase.header`.

**v0.41.2**
* Updated annotations on `MessageBase.save`.
* Added new enum `BodyTypes`.
Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,8 @@ your access to the newest major version of extract-msg.
.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
:target: LICENSE.txt

.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.41.2-blue.svg
:target: https://pypi.org/project/extract-msg/0.41.2/
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.41.3-blue.svg
:target: https://pypi.org/project/extract-msg/0.41.3/

.. |PyPI2| image:: https://img.shields.io/badge/python-3.8+-brightgreen.svg
:target: https://www.python.org/downloads/release/python-3816/
Expand Down
4 changes: 2 additions & 2 deletions extract_msg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__author__ = 'Destiny Peterson & Matthew Walker'
__date__ = '2023-05-24'
__version__ = '0.41.2'
__date__ = '2023-06-10'
__version__ = '0.41.3'

__all__ = [
# Modules:
Expand Down
25 changes: 17 additions & 8 deletions extract_msg/message_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@
from .structures.report_tag import ReportTag
from .recipient import Recipient
from .utils import (
addNumToDir, addNumToZipDir, createZipOpen, findWk, htmlSanitize,
inputToBytes, inputToString, isEncapsulatedRtf, prepareFilename,
rtfSanitizeHtml, rtfSanitizePlain, validateHtml
addNumToDir, addNumToZipDir, createZipOpen, decodeRfc2047, findWk,
htmlSanitize, inputToBytes, inputToString, isEncapsulatedRtf,
prepareFilename, rtfSanitizeHtml, rtfSanitizePlain, validateHtml
)

from imapclient.imapclient import decode_utf7
Expand Down Expand Up @@ -135,6 +135,7 @@ def _genRecipient(self, recipientType, recipientInt : RecipientType) -> Optional
if self.headerInit():
value = self.header[recipientType]
if value:
value = decodeRfc2047(value)
value = value.replace(',', self.__recipientSeparator)

# If the header had a blank field or didn't have the field, generate
Expand Down Expand Up @@ -792,7 +793,7 @@ def save(self, **kwargs) -> MessageBase:

# If the user has requested the headers for this file, save it now.
if kwargs.get('saveHeader', False):
headerText = self._getStringStream('__substg1.0_007D')
headerText = self.headerText
if not headerText:
headerText = constants.HEADER_FORMAT.format(subject = self.subject, **self.header)

Expand Down Expand Up @@ -1047,15 +1048,15 @@ def detectedBodies(self) -> BodyTypes:
return bodies

@property
def header(self):
def header(self) -> email.message.Message:
"""
Returns the message header, if it exists. Otherwise it will generate
one.
"""
try:
return self._header
except AttributeError:
headerText = self._getStringStream('__substg1.0_007D')
headerText = self.headerText
if headerText:
self._header = EmailParser().parsestr(headerText)
self._header['date'] = self.date
Expand All @@ -1071,6 +1072,7 @@ def header(self):
# TODO find authentication results outside of header
header.add_header('Authentication-Results', None)
self._header = header

return self._header

@property
Expand Down Expand Up @@ -1130,6 +1132,13 @@ def headerFormatProperties(self) -> constants.HEADER_FORMAT_TYPE:
},
}

@functools.cached_property
def headerText(self) -> Optional[str]:
"""
The raw text of the header stream, if it exists.
"""
return self._getStringStream('__substg1.0_007D')

@property
def htmlBody(self) -> Optional[bytes]:
"""
Expand Down Expand Up @@ -1229,7 +1238,7 @@ def messageId(self) -> Optional[str]:
except AttributeError:
headerResult = None
if self.headerInit():
headerResult = self._header['message-id']
headerResult = self.header['message-id']
if headerResult is not None:
self._messageId = headerResult
else:
Expand Down Expand Up @@ -1329,7 +1338,7 @@ def sender(self) -> Optional[str]:
except AttributeError:
# Check header first
if self.headerInit():
headerResult = self.header['from']
headerResult = decodeRfc2047(self.header['from'])
if headerResult is not None:
self._sender = headerResult
return headerResult
Expand Down
16 changes: 16 additions & 0 deletions extract_msg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import collections
import copy
import datetime
import email.header
import email.message
import email.policy
import glob
Expand Down Expand Up @@ -169,6 +170,21 @@ def _open(name, mode, *args, **kwargs):
return _open


def decodeRfc2047(encoded : str) -> str:
"""
Decodes text encoded using the method specified in RFC 2047.
"""
# This returns a list of tuples containing the bytes and the encoding they
# are using, so we decode each one and join them together.
#
# decode_header header will return a string instead of bytes for the first
# object if the input is not encoded, something that is frustrating.
return ''.join(
x[0].decode(x[1] or 'ascii') if isinstance(x[0], bytes) else x
for x in email.header.decode_header(encoded)
)


def dictGetCasedKey(_dict : Dict, key : Any) -> Any:
"""
Retrieves the key from the dictionary with the proper casing using a
Expand Down

0 comments on commit ac77e62

Please sign in to comment.