Skip to content

Commit

Permalink
Merge pull request #68 from Syncurity/master
Browse files Browse the repository at this point in the history
v0.22
  • Loading branch information
TheElementalOfDestruction authored Jan 22, 2019
2 parents dba0f38 + afeb2b8 commit 8cc8d97
Show file tree
Hide file tree
Showing 19 changed files with 727 additions and 170 deletions.
10 changes: 10 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
**v0.22.0**
* [[Syncurity #30](https://github.com/Syncurity/msg-extractor/issues/30)] Added `--validate` option.
* [[Syncurity #24](https://github.com/Syncurity/msg-extractor/issues/24)] Moved all dev code into its own scripts. Use `--dev` to use from the command line.
* [[mattgwwalker #67](https://github.com/Syncurity/msg-extractor/issues/67)] Added compatability module to enforce unicode os functions.
* Added new function to `Message` class: `Message.sExists`. This function checks if a string stream exists. It's input should be formatted identically to that of `Message._getStringSteam`.
* Added new function to `Message` class: `Message.fix_path`. This function will add the proper prefix to the path (if the `prefix` parameter is true) and adjust the path to be a string rather than a list or tuple.
* Added new function to `utils.py`: `get_full_class_name`. This function returns a string containing the module name and the class name of any instance of any class. It is returned in the format of `{module}.{class}`.
* Added a sort of alias of `Message._getStream`, `Message._getStringStream`, `Message.Exists`, and `Message.sExists` to `Attachment` and `Recipient`. These functions run inside the associated attachment directory or recipient directory, respectively.
* Added a fix to an issue introduced in an earlier version caused by accidentally deleting a letter in the code.

**v0.21.0**
* [[Syncurity #7](https://github.com/Syncurity/msg-extractor/issues/7)] Changed debug code to use logging module.
* [[Syncurity #26](https://github.com/Syncurity/msg-extractor/issues/26)] Fixed Attachment class using wrong properties file location in embedded msg files.
Expand Down
5 changes: 2 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,6 @@ Here is a list of things that are currently on our todo list:
* Tests (ie. unittest)
* Finish writing a usage guide
* Improve the intelligence of the saving functions
* Create a Pypi package
* Provide a way to save attachments and messages into a custom location under a custom name
* Implement better property handling that will convert each type into a python equivalent if possible
* Implement handling of named properties
Expand All @@ -161,8 +160,8 @@ Here is a list of things that are currently on our todo list:

.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
:target: LICENSE.txt
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.21.0-blue.svg
:target: https://pypi.org/project/extract-msg/0.21.0/
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.22.0-blue.svg
:target: https://pypi.org/project/extract-msg/0.22.0/
.. |PyPI1| image:: https://img.shields.io/badge/python-2.7+-brightgreen.svg
:target: https://www.python.org/downloads/release/python-2715/
.. |PyPI2| image:: https://img.shields.io/badge/python-3.6+-brightgreen.svg
Expand Down
2 changes: 1 addition & 1 deletion extract_msg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@

__author__ = 'Matthew Walker & The Elemental of Creation'
__date__ = '2018-12-05'
__version__ = '0.21.0'
__version__ = '0.22.0'

from extract_msg import constants
from extract_msg.attachment import Attachment
Expand Down
47 changes: 33 additions & 14 deletions extract_msg/__main__.py
Original file line number Diff line number Diff line change
@@ -1,32 +1,51 @@
import logging
import os
import sys
import traceback

from extract_msg import __doc__, utils
from extract_msg.compat import os_ as os
from extract_msg.message import Message

if __name__ == '__main__':
# Setup logging to stdout, indicate running from cli
CLI_LOGGING = 'extract_msg_cli'

args = utils.get_command_args()
args = utils.get_command_args(sys.argv[1:])
level = logging.INFO if args.verbose else logging.WARNING
utils.setup_logging(args.config_path, level, args.log, args.file_logging)
currentdir = os.getcwd() # Store this just in case the paths that have been given are relative
currentdir = os.getcwdu() # Store this just in case the paths that have been given are relative
if args.out_path:
if not os.path.exists(args.out_path):
os.makedirs(args.out_path)
out = args.out_path
else:
out = currentdir
if args.dev:
import extract_msg.dev
extract_msg.dev.main(args, sys.argv[1:])
elif args.validate:
import json
import pprint
import time

for x in args.msgs:
try:
with Message(x[0]) as msg:
#Right here we should still be in the path in currentdir
os.chdir(out)
msg.save(toJson = args.json, useFileName = args.use_filename, ContentId = args.cid)
except Exception as e:
print("Error with file '" + filename + "': " +
traceback.format_exc())
os.chdir(currentdir)
from extract_msg import validation

val_results = {x[0]: validation.validate(x[0]) for x in args.msgs}
filename = 'validation {}.json'.format(int(time.time()))
print('Validation Results:')
pprint.pprint(val_results)
print('These results have been saved to {}'.format(filename))
with open(filename, 'w') as fil:
fil.write(json.dumps(val_results))
utils.get_input('Press enter to exit...')
else:
utils.setup_logging(args.config_path, level, args.log, args.file_logging)
for x in args.msgs:
try:
with Message(x[0]) as msg:
# Right here we should still be in the path in currentdir
os.chdir(out)
msg.save(toJson = args.json, useFileName = args.use_filename, ContentId = args.cid)
except Exception as e:
print("Error with file '" + x[0] + "': " +
traceback.format_exc())
os.chdir(currentdir)
62 changes: 37 additions & 25 deletions extract_msg/attachment.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,47 +26,59 @@ def __init__(self, msg, dir_):
object.__init__(self)
self.__msg = msg
self.__dir = dir_
self.__props = Properties(
self.msg._getStream([self.__dir, '__properties_version1.0']),
self.__props = Properties(self._getStream('__properties_version1.0'),
constants.TYPE_ATTACHMENT)
# Get long filename
self.__longFilename = msg._getStringStream([dir_, '__substg1.0_3707'])
self.__longFilename = self._getStringStream('__substg1.0_3707')

# Get short filename
self.__shortFilename = msg._getStringStream([dir_, '__substg1.0_3704'])
self.__shortFilename = self._getStringStream('__substg1.0_3704')

# Get Content-ID
self.__cid = msg._getStringStream([dir_, '__substg1.0_3712'])
self.__cid = self._getStringStream('__substg1.0_3712')

# Get attachment data
if msg.Exists([dir_, '__substg1.0_37010102']):
if self.Exists('__substg1.0_37010102'):
self.__type = 'data'
self.__data = msg._getStream([dir_, '__substg1.0_37010102'])
elif msg.Exists([dir_, '__substg1.0_3701000D']):
if (self.props['37050003'].value & 0x7) != 0x5:
if not debug:
raise NotImplementedError(
'Current version of extract_msg does not support extraction of containers that are not embeded msg files.')
# TODO add implementation
else:
# DEBUG
logger.debug('Debugging is true, ignoring NotImplementedError and printing debug info...')
logger.debug('dir_ = {}'.format(dir_))
logger.debug('Writing properties stream to output:')
logger.debug('--------Start-Properties-Stream--------\n' +
properHex(self.props.stream) +
'\n---------End-Properties-Stream---------')
logger.debug('Writing directory contents to output:')
logger.debug('--------Start-Directory-Content--------')
logger.debug('\n'.join([repr(x) for x in msg.listDir(True, True)]))
logger.debug('---------End-Directory-Content---------')
self.__data = self._getStream('__substg1.0_37010102')
elif self.Exists('__substg1.0_3701000D'):
if (self.__props['37050003'].value & 0x7) != 0x5:
raise NotImplementedError(
'Current version of extract_msg does not support extraction of containers that are not embedded msg files.')
# TODO add implementation
else:
self.__prefix = msg.prefixList + [dir_, '__substg1.0_3701000D']
self.__type = 'msg'
self.__data = msg.__class__(self.msg.path, self.__prefix, self.__class__)
else:
# TODO Handling for special attacment types (like 0x00000007)
raise TypeError('Unknown attachment type.')

def _getStream(self, filename):
return self.__msg._getStream([self.__dir, filename])

def _getStringStream(self, filename):
"""
Gets a string representation of the requested filename.
Checks for both ASCII and Unicode representations and returns
a value if possible. If there are both ASCII and Unicode
versions, then :param prefer: specifies which will be
returned.
"""
return self.__msg._getStringStream([self.__dir, filename])

def Exists(self, filename):
"""
Checks if stream exists inside the attachment folder.
"""
return self.__msg.Exists([self.__dir, filename])

def sExists(self, filename):
"""
Checks if the string stream exists inside the attachment folder.
"""
return self.__msg.sExists([self.__dir, filename])

def save(self, contentId=False, json=False, useFileName=False, raw=False, customPath=None, customFilename=None):
# Check if the user has specified a custom filename
filename = None
Expand Down
Empty file added extract_msg/compat/__init__.py
Empty file.
9 changes: 9 additions & 0 deletions extract_msg/compat/os_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"""
Compatibility module to ensure that certain functions exist across python versions
"""

from os import *
import sys

if sys.version_info[0] >= 3:
getcwdu = getcwd
5 changes: 3 additions & 2 deletions extract_msg/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@
NEEDS_ARG = [
'--out-name',
]
MAINDOC = "extract_msg:\n\tExtracts emails and attachments saved in Microsoft Outlook's .msg files.\n\nhttps://github.com/mattgwwalker/msg-extractor"
MAINDOC = "extract_msg:\n\tExtracts emails and attachments saved in Microsoft Outlook's .msg files.\n\n" \
"https://github.com/mattgwwalker/msg-extractor"

# Define pre-compiled structs to make unpacking slightly faster
# General structs
Expand Down Expand Up @@ -142,7 +143,7 @@
# This property information was sourced from
# http://www.fileformat.info/format/outlookmsg/index.htm
# on 2013-07-22.
# It was extened by The Elemental of Creation on 2018-10-12
# It was extended by The Elemental of Creation on 2018-10-12
PROPERTIES = {
'00010102': 'Template data',
'0002000B': 'Alternate recipient allowed',
Expand Down
67 changes: 67 additions & 0 deletions extract_msg/dev.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""
Module for collecting data to be sent to the developer.
"""

# NOTE: Order of tasks:
# 1. Check for exceptions:
# * Check the entire process for exceptions raised by a specific file and log them. If none occur,
# log something like "No exceptions were detected."
# 2. Run the file through the developer versions of the classes


import logging

from extract_msg import dev_classes
from extract_msg import utils
from extract_msg.compat import os_ as os
from extract_msg.message import Message


logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())


def setup_dev_logger(default_path=None, logfile = None, env_key='EXTRACT_MSG_LOG_CFG'):
utils.setup_logging(default_path, 5, logfile, True, env_key)


def main(args, argv):
"""
Please only run this from the command line. Attempting to use this
otherwise is likely to fail. :param args: is the class instance
returned by `extract_msg.utils.get_command_args`. :param argv: is
the list of arguments that were the input to the aforementioned
function.
"""
setup_dev_logger(args.config_path, args.log)
currentdir = os.getcwdu() # Store this just in case the paths that have been given are relative
if args.out_path:
if not os.path.exists(args.out_path):
os.makedirs(args.out_path)
out = args.out_path
else:
out = currentdir
logger.log(5, 'ARGV: {}'.format(argv))
for y, x in enumerate(args.msgs):
logger.log(5, '---- RUNNING DEVELOPER MODE ON FILE {} ----'.format(x[0]))
logger.log(5, 'EXCEPTION CHECK:')
try:
with Message(x[0]) as msg:
# Right here we should still be in the path in currentdir
os.chdir(out)
msg.save(toJson = args.json, useFileName = args.use_filename, ContentId = args.cid)
except Exception as e:
logger.exception(e)
else:
logger.log(5, 'No exceptions raised.')
logger.log(5, 'DEVELOPER CLASS OUTPUT:')
os.chdir(currentdir)
dev_classes.Message(x[0])
logger.log(5, '---- END OF DEVELOPER LOG ----')
logpath = None;
for x in logging.root.handlers:
try:
logpath = x.baseFilename
except AttributeError:
pass;
print('Logging complete. Log has been saved to {}'.format(logpath))
2 changes: 2 additions & 0 deletions extract_msg/dev_classes/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from extract_msg.dev_classes.attachment import Attachment
from extract_msg.dev_classes.message import Message
84 changes: 84 additions & 0 deletions extract_msg/dev_classes/attachment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import logging

from extract_msg import constants
from extract_msg.properties import Properties
from extract_msg.utils import properHex

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())


class Attachment(object):
"""
Developer version of the `extract_msg.attachment.Attachment` class.
"""
def __init__(self, msg, dir_):
"""
:param msg: the Message instance that the attachment belongs to.
:param dir_: the directory inside the msg file where the attachment is located.
"""
object.__init__(self)
self.__msg = msg
self.__dir = dir_
self.__props = Properties(
msg._getStream([self.__dir, '__properties_version1.0']),
constants.TYPE_ATTACHMENT)

# Get attachment data
if msg.Exists([dir_, '__substg1.0_37010102']):
self.__type = 'data'
self.__data = msg._getStream([dir_, '__substg1.0_37010102'])
elif msg.Exists([dir_, '__substg1.0_3701000D']):
if (self.__props['37050003'].value & 0x7) != 0x5:
logger.log(5, 'Printing details of NotImplementedError...')
logger.log(5, 'dir_ = {}'.format(dir_))
logger.log(5, 'Writing properties stream to output:')
logger.log(5, '--------Start-Properties-Stream--------\n' +
properHex(self.__props.stream) +
'\n---------End-Properties-Stream---------')
logger.log(5, 'Writing directory contents to output:')
logger.log(5, '--------Start-Directory-Content--------\n' +
'\n'.join([repr(x) for x in msg.listDir(True, True)]))
logger.log(5, '---------End-Directory-Content---------')
logger.log(5, 'End of NotImplementedError details')
else:
self.__prefix = msg.prefixList + [dir_, '__substg1.0_3701000D']
self.__type = 'msg'
self.__data = msg.__class__(msg.path, self.__prefix)
else:
raise TypeError('Unknown attachment type.')

@property
def data(self):
"""
Returns the attachment data.
"""
return self.__data

@property
def dir(self):
"""
Returns the directory inside the msg file where the attachment is located.
"""
return self.__dir

@property
def msg(self):
"""
Returns the Message instance the attachment belongs to.
"""
return self.__msg

@property
def props(self):
"""
Returns the Properties instance of the attachment.
"""
return self.__props

@property
def type(self):
"""
Returns the type of the data.
"""
return self.__type
Loading

0 comments on commit 8cc8d97

Please sign in to comment.