Skip to content

Commit

Permalink
Merge pull request #405 from TeamMsgExtractor/next-release
Browse files Browse the repository at this point in the history
Version 0.48.1
  • Loading branch information
TheElementalOfDestruction authored Mar 8, 2024
2 parents e2b67d7 + af5fa57 commit 235a27c
Show file tree
Hide file tree
Showing 9 changed files with 119 additions and 30 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
**v0.48.1**
* Added an option (`-s`, `--stdin`) to the command line to take an MSG file from stdin. This allows the user to pipe the MSG data from another program directly instead of having to write a middleman that uses the `extract-msg` library directly or having to write the file to the disk first.
* Changed main function to allow for manual argument list to be passed to it.
* Added attributes to `AttachmentBase` for creation and modification time. These can be accessed through `createdAt` or `creationTime` and `lastModificationTime` or `modifiedAt`.
* Changed `OleWriter` tests to output the name of the test file being done if an error occurs.
* Added tests for some command line stuff.

**v0.48.0**
* Adjusted error handling for named properties to handle critical streams being missing and to allow suppression of those errors.
* Adjusted error handling for named properties to allow silencing of errors caused by invalid references to the name stream. If `ErrorBehavior.NAMED_NAME_STREAM` is provided to the `MSGFile` instance, a warning will be logged and that entry will simply be dropped.
Expand Down
7 changes: 4 additions & 3 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ refer to the usage information provided from the program's help dialog:

usage: extract_msg [-h] [--use-content-id] [--json] [--file-logging] [-v] [--log LOG] [--config CONFIGPATH] [--out OUTPATH] [--use-filename] [--dump-stdout] [--html] [--pdf] [--wk-path WKPATH] [--wk-options [WKOPTIONS ...]]
[--prepared-html] [--charset CHARSET] [--raw] [--rtf] [--allow-fallback] [--skip-body-not-found] [--zip ZIP] [--save-header] [--attachments-only] [--skip-hidden] [--no-folders] [--skip-embedded] [--extract-embedded]
[--overwrite-existing] [--skip-not-implemented] [--out-name OUTNAME | --glob] [--ignore-rtfde] [--progress]
[--overwrite-existing] [--skip-not-implemented] [--out-name OUTNAME | --glob] [--ignore-rtfde] [--progress] [-s]
msg [msg ...]

extract_msg: Extracts emails and attachments saved in Microsoft Outlook's .msg files. https://github.com/TeamMsgExtractor/msg-extractor
Expand Down Expand Up @@ -107,6 +107,7 @@ refer to the usage information provided from the program's help dialog:
--glob, --wildcard Interpret all paths as having wildcards. Incompatible with --out-name.
--ignore-rtfde Ignores all errors thrown from RTFDE when trying to save. Useful for allowing fallback to continue when an exception happens.
--progress Shows what file the program is currently working on during it's progress.
-s, --stdin Read file from stdin (only works with one file at a time).

**To use this in your own script**, start by using:

Expand Down Expand Up @@ -259,8 +260,8 @@ your access to the newest major version of extract-msg.
.. |License: GPL v3| image:: https://img.shields.io/badge/License-GPLv3-blue.svg
:target: LICENSE.txt

.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.48.0-blue.svg
:target: https://pypi.org/project/extract-msg/0.48.0/
.. |PyPI3| image:: https://img.shields.io/badge/pypi-0.48.1-blue.svg
:target: https://pypi.org/project/extract-msg/0.48.1/

.. |PyPI2| image:: https://img.shields.io/badge/python-3.8+-brightgreen.svg
:target: https://www.python.org/downloads/release/python-3810/
Expand Down
4 changes: 2 additions & 2 deletions extract_msg/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__author__ = 'Destiny Peterson & Matthew Walker'
__date__ = '2024-02-28'
__version__ = '0.48.0'
__date__ = '2024-03-08'
__version__ = '0.48.1'

__all__ = [
# Modules:
Expand Down
7 changes: 4 additions & 3 deletions extract_msg/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,13 @@

from extract_msg import __doc__, openMsg, utils
from extract_msg.enums import ErrorBehavior
from typing import List


def main() -> None:
def main(argv : List[str]) -> None:
# Setup logging to stdout, indicate running from cli
CLI_LOGGING = 'extract_msg_cli'
args = utils.getCommandArgs(sys.argv[1:])
args = utils.getCommandArgs(argv[1:])

# Determine where to save the files to.
currentDir = os.getcwd() # Store this in case the path changes.
Expand Down Expand Up @@ -111,4 +112,4 @@ def strSanitize(inp):
_zip.close()

if __name__ == '__main__':
main()
main(sys.argv)
63 changes: 47 additions & 16 deletions extract_msg/attachments/attachment_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,7 +473,7 @@ def save(self, **kwargs) -> SAVE_TYPE:
the first item specifies what the second value will be.
"""

@functools.cached_property
@cached_property
def attachmentEncoding(self) -> Optional[bytes]:
"""
The encoding information about the attachment object.
Expand All @@ -483,7 +483,7 @@ def attachmentEncoding(self) -> Optional[bytes]:
"""
return self.getStream('__substg1.0_37020102')

@functools.cached_property
@cached_property
def additionalInformation(self) -> Optional[str]:
"""
The additional information about the attachment.
Expand All @@ -495,7 +495,7 @@ def additionalInformation(self) -> Optional[str]:
"""
return self.getStringStream('__substg1.0_370F')

@functools.cached_property
@cached_property
def cid(self) -> Optional[str]:
"""
Returns the Content ID of the attachment, if it exists.
Expand Down Expand Up @@ -526,8 +526,25 @@ def clsid(self) -> str:

@property
def contentId(self) -> Optional[str]:
"""
Alias of :attr:`cid`.
"""
return self.cid

@property
def createdAt(self) -> Optional[datetime.datetime]:
"""
Alias of :attr:`creationTime`.
"""
return self.creationTime

@cached_property
def creationTime(self) -> Optional[datetime.datetime]:
"""
The time the attachment was created.
"""
return self.getPropertyVal('30070040')

@property
@abc.abstractmethod
def data(self) -> Optional[object]:
Expand All @@ -537,7 +554,7 @@ def data(self) -> Optional[object]:
Returns ``None`` if there is no data to save.
"""

@functools.cached_property
@cached_property
def dataType(self) -> Optional[Type[object]]:
"""
The class that the data type will use, if it can be retrieved.
Expand All @@ -560,14 +577,14 @@ def dir(self) -> str:
"""
return self.__dir

@functools.cached_property
@cached_property
def displayName(self) -> Optional[str]:
"""
Returns the display name of the folder.
"""
return self.getStringStream('__substg1.0_3001')

@functools.cached_property
@cached_property
def exceptionReplaceTime(self) -> Optional[datetime.datetime]:
"""
The original date and time at which the instance in the recurrence
Expand All @@ -577,48 +594,62 @@ def exceptionReplaceTime(self) -> Optional[datetime.datetime]:
"""
return self.getPropertyVal('7FF90040')

@functools.cached_property
@cached_property
def extension(self) -> Optional[str]:
"""
The reported extension for the file.
"""
return self.getStringStream('__substg1.0_3703')

@functools.cached_property
@cached_property
def hidden(self) -> bool:
"""
Indicates whether an Attachment object is hidden from the end user.
"""
return bool(self.getPropertyVal('7FFE000B'))

@functools.cached_property
@cached_property
def isAttachmentContactPhoto(self) -> bool:
"""
Whether the attachment is a contact photo for a Contact object.
"""
return bool(self.getPropertyVal('7FFF000B'))

@functools.cached_property
@cached_property
def lastModificationTime(self) -> Optional[datetime.datetime]:
"""
The last time the attachment was modified.
"""
return self.getPropertyVal('30080040')

@cached_property
def longFilename(self) -> Optional[str]:
"""
Returns the long file name of the attachment, if it exists.
"""
return self.getStringStream('__substg1.0_3707')

@functools.cached_property
@cached_property
def longPathname(self) -> Optional[str]:
"""
The fully qualified path and file name with extension.
"""
return self.getStringStream('__substg1.0_370D')

@functools.cached_property
@cached_property
def mimetype(self) -> Optional[str]:
"""
The content-type mime header of the attachment, if specified.
"""
return tryGetMimetype(self, self.getStringStream('__substg1.0_370E'))

@property
def modifiedAt(self) -> Optional[datetime.datetime]:
"""
Alias of :attr:`lastModificationTime`.
"""
return self.lastModificationTime

@property
def msg(self) -> MSGFile:
"""
Expand All @@ -631,7 +662,7 @@ def msg(self) -> MSGFile:
raise ReferenceError('The MSGFile for this Attachment instance has been garbage collected.')
return msg

@functools.cached_property
@cached_property
def name(self) -> Optional[str]:
"""
The best name available for the file.
Expand All @@ -650,7 +681,7 @@ def namedProperties(self) -> NamedProperties:
"""
return self.__namedProperties

@functools.cached_property
@cached_property
def payloadClass(self) -> Optional[str]:
"""
The class name of an object that can display the contents of the
Expand All @@ -665,7 +696,7 @@ def props(self) -> PropertiesStore:
"""
return self.__props

@functools.cached_property
@cached_property
def renderingPosition(self) -> Optional[int]:
"""
The offset, in rendered characters, to use when rendering the attachment
Expand All @@ -676,7 +707,7 @@ def renderingPosition(self) -> Optional[int]:
"""
return self.getPropertyVal('370B0003')

@property
@cached_property
def shortFilename(self) -> Optional[str]:
"""
The short file name of the attachment, if it exists.
Expand Down
15 changes: 13 additions & 2 deletions extract_msg/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
import pathlib
import shutil
import struct
import sys
import weakref
import zipfile

Expand Down Expand Up @@ -359,6 +360,7 @@ def getCommandArgs(args: Sequence[str]) -> argparse.Namespace:
parser = argparse.ArgumentParser(description = constants.MAINDOC, prog = 'extract_msg')
outFormat = parser.add_mutually_exclusive_group()
inputFormat = parser.add_mutually_exclusive_group()
inputType = parser.add_mutually_exclusive_group(required = True)
# --use-content-id, --cid
parser.add_argument('--use-content-id', '--cid', dest='cid', action='store_true',
help='Save attachments by their Content ID, if they have one. Useful when working with the HTML body.')
Expand All @@ -368,7 +370,7 @@ def getCommandArgs(args: Sequence[str]) -> argparse.Namespace:
# --file-logging
parser.add_argument('--file-logging', dest='fileLogging', action='store_true',
help='Enables file logging. Implies --verbose level 1.')
# --verbose
# -v, --verbose
parser.add_argument('-v', '--verbose', dest='verbose', action='count', default=0,
help='Turns on console logging. Specify more than once for higher verbosity.')
# --log PATH
Expand Down Expand Up @@ -455,12 +457,19 @@ def getCommandArgs(args: Sequence[str]) -> argparse.Namespace:
# --progress
parser.add_argument('--progress', dest='progress', action='store_true',
help='Shows what file the program is currently working on during it\'s progress.')
# -s, --stdout
inputType.add_argument('-s', '--stdin', dest='stdin', action='store_true',
help='Read file from stdin (only works with one file at a time).')
# [MSG files]
parser.add_argument('msgs', metavar='msg', nargs='+',
inputType.add_argument('msgs', metavar='msg', nargs='*', default=[],
help='An MSG file to be parsed.')

options = parser.parse_args(args)

if options.stdin:
# Read the MSG file from stdin and shove it into the msgs list.
options.msgs.append(sys.stdin.buffer.read())

if options.outName and options.noFolders:
raise IncompatibleOptionsError('--out-name is not compatible with --no-folders.')

Expand Down Expand Up @@ -502,6 +511,8 @@ def getCommandArgs(args: Sequence[str]) -> argparse.Namespace:
if options.glob:
if options.outName:
raise IncompatibleOptionsError('--out-name is not supported when using wildcards.')
if options.stdin:
raise IncompatibleOptionsError('--stdin is not supported with using wildcards.')
fileLists = []
for path in options.msgs:
fileLists += glob.glob(path)
Expand Down
4 changes: 3 additions & 1 deletion extract_msg_tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
__all__ = [
'AttachmentTests',
'CommandLineTests',
'OleWriterEditingTests',
'OleWriterExportTests',
'PropTests',
'ValidationTests',
]

from .validation_tests import ValidationTests
from .attachment_tests import AttachmentTests
from .cmd_line_tests import CommandLineTests
from .ole_writer_tests import OleWriterEditingTests, OleWriterExportTests
from .prop_tests import PropTests
from .validation_tests import ValidationTests
34 changes: 34 additions & 0 deletions extract_msg_tests/cmd_line_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
__all__ = [
'CommandLineTests',
]


import pathlib
import subprocess
import sys
import unittest

from .constants import TEST_FILE_DIR, USER_TEST_DIR


class CommandLineTests(unittest.TestCase):
def testStdin(self, testFileDir = TEST_FILE_DIR):
for path in testFileDir.glob('*.msg'):
# First, let's do the file on the disk.
process = subprocess.Popen([sys.executable, '-m', 'extract_msg', '--dump-stdout', str(path)], stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
# Wait for the process to return data.
stdout1, stderr1 = process.communicate()

# Now, do the same thing with stdin.
process = subprocess.Popen([sys.executable, '-m', 'extract_msg', '-s', '--dump-stdout'], stdin = subprocess.PIPE, stdout = subprocess.PIPE, stderr = subprocess.PIPE)
with open(path, 'rb') as f:
stdout2, stderr2 = process.communicate(f.read())

# Now, compare the two.
with self.subTest(path):
self.assertEqual(stdout1, stdout2)
self.assertEqual(stderr1, stderr2)

@unittest.skipIf(USER_TEST_DIR is None, 'User test files not defined.')
def testUserStdin(self):
self.testStdin(USER_TEST_DIR)
8 changes: 5 additions & 3 deletions extract_msg_tests/ole_writer_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,11 @@ def testExportExamples(self, testFileDir = TEST_FILE_DIR):
with open(exportResultFile, 'rb') as f:
exportResult = f.read()

# We use two assertions to give better error messages.
self.assertCountEqual(exportResult, exportedBytes, 'Exported data is wrong size.')
self.assertEqual(exportedBytes, exportResult, 'Exported data is incorrect.')
# Use a subtest to print the file name.
with self.subTest(str(testFileDir / exportResultFile.name)):
# We use two assertions to give better error messages.
self.assertCountEqual(exportResult, exportedBytes, 'Exported data is wrong size.')
self.assertEqual(exportedBytes, exportResult, 'Exported data is incorrect.')

@unittest.skipIf(USER_TEST_DIR is None, 'User test files not defined.')
@unittest.skipIf(USER_TEST_DIR is not None and not (USER_TEST_DIR / 'export-results').exists(), 'User export tests not defined.')
Expand Down

0 comments on commit 235a27c

Please sign in to comment.