Skip to content

Commit

Permalink
Merge pull request #381 from TeamMsgExtractor/next-release
Browse files Browse the repository at this point in the history
Version 0.43.0
  • Loading branch information
TheElementalOfDestruction authored Aug 3, 2023
2 parents 8fc4480 + 863ae89 commit 8dfff86
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 9 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
**v0.43.0**
* [[TeamMsgExtractor #56](https://github.com/TeamMsgExtractor/msg-extractor/issues/56)] [[TeamMsgExtractor #248](https://github.com/TeamMsgExtractor/msg-extractor/issues/248)] Added new function `MessageBase.asEmailMessage` which will convert the `MessageBase` instance, if possible, to an `email.message.EmailMessage` object. If an embedded MSG file on a `MessageBase` object is of a class that does not have this function, it will simply be attached to the instance as bytes.
* Changed imports in `message_base.py` to help with type checkers.
* Changed from using `email.parser.EmailParser` to `email.parser.HeaderParser` in `MessageBase.header`.
* Changed some of the internal code for `MessageBase.header`. This should improve usage of it, and should not have any noticeable negative changes. You man notice some of the values parse slightly differently, but this effect should be mostly suppressed.

**v0.42.2**
* Fix bug in `AttachmentBase.mimetype` that would cause it to throw an error when accessed. This bug was introduced in `v0.42.0`.

Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ your access to the newest major version of extract-msg.
.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
:target: LICENSE.txt

.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.42.2-blue.svg
:target: https://pypi.org/project/extract-msg/0.42.2/
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.43.0-blue.svg
:target: https://pypi.org/project/extract-msg/0.43.0/

.. |PyPI2| image:: https://img.shields.io/badge/python-3.8+-brightgreen.svg
:target: https://www.python.org/downloads/release/python-3816/
Expand Down
2 changes: 1 addition & 1 deletion extract_msg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

__author__ = 'Destiny Peterson & Matthew Walker'
__date__ = '2023-08-02'
__version__ = '0.42.2'
__version__ = '0.43.0'

__all__ = [
# Modules:
Expand Down
69 changes: 63 additions & 6 deletions extract_msg/msg_classes/message_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import base64
import datetime
import email.message
import email.utils
import functools
import html
Expand All @@ -20,7 +21,9 @@
import compressed_rtf
import RTFDE

from email.parser import Parser as EmailParser
from email import policy
from email.message import EmailMessage
from email.parser import HeaderParser
from typing import Callable, List, Optional, Union

from .. import constants
Expand All @@ -30,8 +33,8 @@
BodyTypes, DeencapType, ErrorBehavior, RecipientType, SaveType
)
from ..exceptions import (
DataNotFoundError, DeencapMalformedData, DeencapNotEncapsulated,
IncompatibleOptionsError, WKError
ConversionError, DataNotFoundError, DeencapMalformedData,
DeencapNotEncapsulated, IncompatibleOptionsError, WKError
)
from .msg import MSGFile
from ..structures.report_tag import ReportTag
Expand Down Expand Up @@ -155,6 +158,59 @@ def _genRecipient(self, recipientType, recipientInt : RecipientType) -> Optional

return value

def asEmailMessage(self) -> EmailMessage:
"""
Returns an instance of EmailMessage used to represent the contents of
this message.
:raises ConversionError: The function failed to convert one of the
attachments into a form that it could attach, and the attachment
data type was not None.
"""
ret = EmailMessage()

# Copy the headers.
for key, value in self.header.items():
ret[key] = value

# Attach the body to the EmailMessage instance.
if self.htmlBody:
ret.set_content(self.body, subtype = 'html', cte = 'quoted-printable')
elif self.body:
ret.set_content(self.body, cte = 'quoted-printable')

# Process attachments.
for att in self.attachments:
if att.dataType:
if hasattr(att.dataType, 'asEmailMessage'):
# Replace the extension with '.eml'.
filename = att.getFilename()
if filename.lower().endswith('.msg'):
filename = filename[:-4] + '.eml'
ret.add_attachment(
att.data.asEmailMessage(),
filename = filename,
cid = att.contentId)
else:
if issubclass(att.dataType, bytes):
data = att.data
elif issubclass(att.dataType, MSGFile):
if hasattr(att.dataType, 'asBytes'):
data = att.asBytes
else:
data = att.data.exportBytes()
else:
raise ConversionError(f'Could not find a suitable method to attach attachment data type "{att.dataType}".')
mime = att.mimetype or 'application/octet-stream'
mainType, subType = mime.split('/')[0], mime.split('/')[-1]
ret.add_attachment(data,
maintype = mainType,
subtype = subType,
filename = att.getFilename(),
cid = att.contentId)

return ret

def deencapsulateBody(self, rtfBody : bytes, bodyType : DeencapType) -> Optional[Union[bytes, str]]:
"""
A function to deencapsulate the specified body from the rtfBody. Returns
Expand Down Expand Up @@ -1009,11 +1065,12 @@ def header(self) -> email.message.Message:
"""
headerText = self.headerText
if headerText:
header = EmailParser().parsestr(headerText)
header['date'] = self.date
header = HeaderParser(policy = policy.default).parsestr(headerText)
del header['Date']
header['Date'] = self.date
else:
logger.info('Header is empty or was not found. Header will be generated from other streams.')
header = EmailParser().parsestr('')
header = HeaderParser(policy = policy.default).parsestr('')
header.add_header('Date', self.date)
header.add_header('From', self.sender)
header.add_header('To', self.to)
Expand Down

0 comments on commit 8dfff86

Please sign in to comment.