Skip to content

Commit

Permalink
Merge pull request #428 from TeamMsgExtractor/next-release
Browse files Browse the repository at this point in the history
Version 0.49.0
  • Loading branch information
TheElementalOfDestruction authored Aug 21, 2024
2 parents 1e3bf80 + 30a28d8 commit aeea79b
Show file tree
Hide file tree
Showing 10 changed files with 264 additions and 59 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
**v0.49.0**
* [[TeamMsgExtractor #427](https://github.com/TeamMsgExtractor/msg-extractor/issues/427)] Adjusted code for converting time stamps to create null dates for any time stamp beyond a certain point. The point was determined to be close to the existing null dates.
* [[TeamMsgExtractor #425](https://github.com/TeamMsgExtractor/msg-extractor/issues/425)] Added basic support for custom attachments that are Windows Metafiles.
* Changed tolerance of bitmap custom attachment handler to allow for attachments with only a CONTENT stream. This change was made after seeing an example of a file that only had a CONTENT stream and no other streams for the custom data. The code now also tries to create default values for things previously determined from those other streams.
* Fixed an issue in `tryGetMimetype` were the code didn't properly check if the data type was bytes (it only checked if it had a type).
* Corrected some exports.
* Added new `ErrorBehavior` value `CUSTOM_ATTACH_TOLERANT` to allow skipping checks for unused data that is normally validated.

**v0.48.7**
* [[TeamMsgExtractor #420](https://github.com/TeamMsgExtractor/msg-extractor/issues/420)] Fixed typo introduced in last version.

Expand Down
4 changes: 2 additions & 2 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,8 @@ your access to the newest major version of extract-msg.
.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
:target: LICENSE.txt

.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.48.7-blue.svg
:target: https://pypi.org/project/extract-msg/0.48.7/
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.49.0-blue.svg
:target: https://pypi.org/project/extract-msg/0.49.0/

.. |PyPI2| image:: https://img.shields.io/badge/python-3.8+-brightgreen.svg
:target: https://www.python.org/downloads/release/python-3810/
Expand Down
4 changes: 2 additions & 2 deletions extract_msg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__author__ = 'Destiny Peterson & Matthew Walker'
__date__ = '2024-07-07'
__version__ = '0.48.7'
__date__ = '2024-08-21'
__version__ = '0.49.0'

__all__ = [
# Modules:
Expand Down
2 changes: 2 additions & 0 deletions extract_msg/attachments/custom_att_handler/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
'CustomAttachmentHandler',
'LinkedObjectAttachment',
'OutlookImageDIB',
'OutlookImageMetafile',

# Functions.
'getHandler',
Expand Down Expand Up @@ -55,6 +56,7 @@ def registerHandler(handler: Type[CustomAttachmentHandler]) -> None:
# Import built-in handler modules. They will all automatically register their
# respecive handler(s).
from .outlook_image_dib import OutlookImageDIB
from .outlook_image_meta import OutlookImageMetafile
from .lnk_obj_att import LinkedObjectAttachment


Expand Down
93 changes: 55 additions & 38 deletions extract_msg/attachments/custom_att_handler/outlook_image_dib.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@


__all__ = [
'OutlookImage',
'OutlookImageDIB',
]


Expand All @@ -12,7 +12,7 @@

from . import registerHandler
from .custom_handler import CustomAttachmentHandler
from ...enums import DVAspect, InsecureFeatures
from ...enums import DVAspect, ErrorBehavior, InsecureFeatures
from ...exceptions import DependencyError, SecurityError


Expand All @@ -31,45 +31,60 @@ class OutlookImageDIB(CustomAttachmentHandler):

def __init__(self, attachment: AttachmentBase):
super().__init__(attachment)
# First we need to get the mailstream.
stream = self.getStream('\x03MailStream')
if not stream:
raise ValueError('MailStream could not be found.')
if len(stream) != 12:
raise ValueError('MailStream is the wrong length.')
# Next get the bitmap data.
# First, get the mandatory bitmap data.
self.__data = self.getStream('CONTENTS')
if not self.__data:
raise ValueError('Bitmap data could not be read for Outlook signature.')
# Get the OLE data.
oleStream = self.getStream('\x01Ole')
if not oleStream:
raise ValueError('OLE stream could not be found.')

# While I have only seen this stream be one length, it could in theory
# be more than one length. So long as it is *at least* 20 bytes, we
# call it valid.
if len(oleStream) < 20:
raise ValueError('OLE stream is too short.')

# Unpack and verify the OLE stream.
vals = _ST_OLE.unpack(oleStream[:20])
# Check the version magic.
if vals[0] != 0x2000001:
raise ValueError('OLE stream has wrong version magic.')
# Check the reserved bytes.
if vals[3] != 0:
raise ValueError('OLE stream has non-zero reserved int.')

# Unpack the mailstream and create the HTML tag.
vals = _ST_MAILSTREAM.unpack(stream)
self.__dvaspect = DVAspect(vals[0])
self.__x = vals[1]
self.__y = vals[2]

# Next we need to get the mailstream.
stream = self.getStream('\x03MailStream')
if stream:
if len(stream) != 12:
raise ValueError('MailStream is the wrong length.')

# Unpack the mailstream.
vals = _ST_MAILSTREAM.unpack(stream)
self.__dvaspect = DVAspect(vals[0])
self.__x = vals[1]
self.__y = vals[2]
else:
#raise ValueError('MailStream could not be found.')
# Create default values.
self.__dvaspect = DVAspect.CONTENT
# TODO figure out what the default values for these should actually
# be.
self.__x = 0
self.__y = 0

# This is done regardless of default values or not.
# Convert to twips for RTF.
self.__xtwips = int(round(self.__x / 1.7639))
self.__ytwips = int(round(self.__y / 1.7639))

# Check the error behavior to see if we should even do this check.
if ErrorBehavior.CUSTOM_ATTACH_TOLERANT not in attachment.msg.errorBehavior:
# Get the OLE data.
oleStream = self.getStream('\x01Ole')
if oleStream:
# While I have only seen this stream be one length, it could in
# theory be more than one length. So long as it is *at least* 20
# bytes, we call it valid.
if len(oleStream) < 20:
raise ValueError('OLE stream is too short.')
# Unpack and verify the OLE stream.
vals = _ST_OLE.unpack(oleStream[:20])
# Check the version magic.
if vals[0] != 0x2000001:
raise ValueError('OLE stream has wrong version magic.')
# Check the reserved bytes.
if vals[3] != 0:
raise ValueError('OLE stream has non-zero reserved int.')
else:
#raise ValueError('OLE stream could not be found.')
# If the stream is there we validate it, so here we just leave
# it alone since nothing is actually stored.
pass

@classmethod
def isCorrectHandler(cls, attachment: AttachmentBase) -> bool:
if attachment.clsid != '00000316-0000-0000-C000-000000000046':
Expand All @@ -78,10 +93,12 @@ def isCorrectHandler(cls, attachment: AttachmentBase) -> bool:
# Check for the required streams.
if not attachment.exists('__substg1.0_3701000D/CONTENTS'):
return False
if not attachment.exists('__substg1.0_3701000D/\x01Ole'):
return False
if not attachment.exists('__substg1.0_3701000D/\x03MailStream'):
return False
# These streams were previously considered mandatory, but are now
# tentatively optional.
#if not attachment.exists('__substg1.0_3701000D/\x01Ole'):
# return False
#if not attachment.exists('__substg1.0_3701000D/\x03MailStream'):
# return False

return True

Expand Down
158 changes: 158 additions & 0 deletions extract_msg/attachments/custom_att_handler/outlook_image_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
from __future__ import annotations


__all__ = [
'OutlookImageMetafile',
]


import struct

from typing import Optional, TYPE_CHECKING

from . import registerHandler
from .custom_handler import CustomAttachmentHandler
from ...enums import DVAspect, ErrorBehavior, InsecureFeatures
from ...exceptions import DependencyError, SecurityError


if TYPE_CHECKING:
from ..attachment_base import AttachmentBase

_ST_OLE = struct.Struct('<IIIII')
_ST_MAILSTREAM = struct.Struct('<III')


class OutlookImageMetafile(CustomAttachmentHandler):
"""
Custom handler for a special attachment type, a Device Independent Bitmap
stored in a way special to Outlook.
"""

def __init__(self, attachment: AttachmentBase):
super().__init__(attachment)
# First, get the mandatory bitmap data.
self.__data = self.getStream('CONTENTS')
if not self.__data:
raise ValueError('Bitmap data could not be read for Outlook signature.')

# Next we need to get the mailstream.
stream = self.getStream('\x03MailStream')
if stream:
if len(stream) != 12:
raise ValueError('MailStream is the wrong length.')

# Unpack the mailstream.
vals = _ST_MAILSTREAM.unpack(stream)
self.__dvaspect = DVAspect(vals[0])
self.__x = vals[1]
self.__y = vals[2]
else:
#raise ValueError('MailStream could not be found.')
# Create default values.
self.__dvaspect = DVAspect.CONTENT
# TODO figure out what the default values for these should actually
# be.
self.__x = 0
self.__y = 0

# This is done regardless of default values or not.
# Convert to twips for RTF.
self.__xtwips = int(round(self.__x / 1.7639))
self.__ytwips = int(round(self.__y / 1.7639))

# Check the error behavior to see if we should even do this check.
if ErrorBehavior.CUSTOM_ATTACH_TOLERANT not in attachment.msg.errorBehavior:
# Get the OLE data.
oleStream = self.getStream('\x01Ole')
if oleStream:
# While I have only seen this stream be one length, it could in
# theory be more than one length. So long as it is *at least* 20
# bytes, we call it valid.
if len(oleStream) < 20:
raise ValueError('OLE stream is too short.')
# Unpack and verify the OLE stream.
vals = _ST_OLE.unpack(oleStream[:20])
# Check the version magic.
if vals[0] != 0x2000001:
raise ValueError('OLE stream has wrong version magic.')
# Check the reserved bytes.
if vals[3] != 0:
raise ValueError('OLE stream has non-zero reserved int.')
else:
#raise ValueError('OLE stream could not be found.')
# If the stream is there we validate it, so here we just leave
# it alone since nothing is actually stored.
pass

@classmethod
def isCorrectHandler(cls, attachment: AttachmentBase) -> bool:
if attachment.clsid != '00000315-0000-0000-C000-000000000046':
return False

# Check for the required streams.
if not attachment.exists('__substg1.0_3701000D/CONTENTS'):
return False
# These streams were previously considered mandatory, but are now
# tentatively optional.
#if not attachment.exists('__substg1.0_3701000D/\x01Ole'):
# return False
#if not attachment.exists('__substg1.0_3701000D/\x03MailStream'):
# return False

return True

def generateRtf(self) -> Optional[bytes]:
"""
Generates the RTF to inject in place of the \\objattph tag.
If this function should do nothing, returns ``None``.
:raises DependencyError: PIL or Pillow could not be found.
"""
if InsecureFeatures.PIL_IMAGE_PARSING not in self.attachment.msg.insecureFeatures:
raise SecurityError('Generating the RTF for a custom attachment requires the insecure feature PIL_IMAGE_PARSING.')

try:
import PIL.Image
except ImportError:
raise DependencyError('PIL or Pillow is required for inserting an Outlook Image into the body.')

# First, convert the bitmap into a PNG so we can insert it into the
# body.
import io

# Note, use self.data instead of self.__data to allow support for
# extensions.
with PIL.Image.open(io.BytesIO(self.data)) as img:
out = io.BytesIO()
img.save(out, 'PNG')

hexData = out.getvalue().hex()

inject = '{\\*\\shppict\n{\\pict\\picscalex100\\picscaley100'
inject += f'\\picw{img.width}\\pich{img.height}'
inject += f'\\picwgoal{self.__xtwips}\\pichgoal{self.__ytwips}\n'
inject += '\\pngblip ' + hexData + '}}'

return inject.encode()

@property
def data(self) -> bytes:
return self.__data

@property
def name(self) -> str:
# Try to get the name from the attachment. If that fails, name it based
# on the number.
if not (name := self.attachment.name):
name = f'attachment {int(self.attachment.dir[-8:], 16)}'
return name + '.wmf'

@property
def obj(self) -> bytes:
return self.data



registerHandler(OutlookImageMetafile)
31 changes: 17 additions & 14 deletions extract_msg/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -666,8 +666,10 @@ class ErrorBehavior(enum.IntFlag):
* THROW: Throw the exception regardless of type.
* ATTACH_NOT_IMPLEMENTED: Silence the exception for NotImplementedError.
* ATTACH_BROKEN: Silence the exception for broken attachments.
* ATTACH_SUPPRESS_ALL: Silence the exception for NotImplementedError and for
broken attachments.
* CUSTOM_ATTACH_TOLERANT: Makes custom attachments more tolerant for
data that is validated but not used.
* ATTACH_SUPPRESS_ALL: Silence the exception for NotImplementedError, for
broken attachments, and for custom attachment issues.
* RTFDE_MALFORMED: Silences errors about malformed RTF data.
* RTFDE_UNKNOWN_ERROR: Silences errors from RTFDE that are not normal.
* RTFDE: Silences all errors from RTFDE.
Expand All @@ -679,22 +681,23 @@ class ErrorBehavior(enum.IntFlag):
simply be dropped.
* SUPPRESS_ALL: Silences all of the above.
"""
THROW = 0b000000
THROW = 0b00000000
# Attachments.
ATTACH_NOT_IMPLEMENTED = 0b000001
ATTACH_BROKEN = 0b000010
ATTACH_SUPPRESS_ALL = 0b000011
ATTACH_NOT_IMPLEMENTED = 0b00000001
ATTACH_BROKEN = 0b00000010
CUSTOM_ATTACH_TOLERANT = 0b00000100
ATTACH_SUPPRESS_ALL = 0b00000111
# RTFDE.
RTFDE_MALFORMED = 0b000100
RTFDE_UNKNOWN_ERROR = 0b001000
RTFDE = 0b001100
RTFDE_MALFORMED = 0b00001000
RTFDE_UNKNOWN_ERROR = 0b00010000
RTFDE = 0b00011000
# General.
STANDARDS_VIOLATION = 0b010000
OLE_DEFECT_INCORRECT = 0b100000
# Named Properties
NAMED_NAME_STREAM = 0b1000000
STANDARDS_VIOLATION = 0b00100000
OLE_DEFECT_INCORRECT = 0b01000000
# Named Properties.
NAMED_NAME_STREAM = 0b10000000

SUPPRESS_ALL = 0b1111111
SUPPRESS_ALL = 0b111111111111



Expand Down
Loading

0 comments on commit aeea79b

Please sign in to comment.