Skip to content

Commit

Permalink
Merge pull request #18 from elifesciences/develop
Browse files Browse the repository at this point in the history
Release version 0.5.0 with logging enhancements
  • Loading branch information
gnott authored Aug 20, 2021
2 parents 4483b9e + 5544984 commit 523b870
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 20 deletions.
13 changes: 7 additions & 6 deletions elifecleaner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,20 @@
import logging


__version__ = "0.4.0"
__version__ = "0.5.0"


LOGGER = logging.getLogger(__name__)
LOGGER.addHandler(logging.NullHandler())


def configure_logging(filename):
def configure_logging(filename, level=logging.INFO, format_string=None):
"configure logging to file"
if not format_string:
format_string = "%(levelname)s %(name)s:%(module)s:%(funcName)s: %(message)s"
handler = logging.FileHandler(filename)
formatter = logging.Formatter(
"%(levelname)s %(name)s:%(module)s:%(funcName)s: %(message)s"
)
formatter = logging.Formatter(format_string)
handler.setFormatter(formatter)
LOGGER.addHandler(handler)
LOGGER.setLevel(logging.INFO)
LOGGER.setLevel(level)
return handler
22 changes: 18 additions & 4 deletions elifecleaner/parse.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import re
from collections import OrderedDict
from xml.etree import ElementTree
Expand All @@ -14,21 +15,34 @@ def check_ejp_zip(zip_file, tmp_dir):
root = parse_article_xml(xml_asset[1])
files = file_list(root)
figures = figure_list(files, asset_file_name_map)
zip_file_name = zip_file.split(os.sep)[-1]
# check for multiple page PDF figures
for pdf in [pdf for pdf in figures if pdf.get("pages") and pdf.get("pages") > 1]:
LOGGER.warning("multiple page PDF figure file: %s", pdf.get("file_name"))
LOGGER.warning(
"%s multiple page PDF figure file: %s", zip_file_name, pdf.get("file_name")
)
# check for missing files
missing_files = find_missing_files(files, asset_file_name_map)
for missing_file in missing_files:
LOGGER.warning("zip does not contain a file in the manifest: %s" % missing_file)
LOGGER.warning(
"%s does not contain a file in the manifest: %s",
zip_file_name,
missing_file,
)
# check for file not listed in the manifest
extra_files = find_extra_files(files, asset_file_name_map)
for extra_file in extra_files:
LOGGER.warning("file not listed in the manifest: %s" % extra_file)
LOGGER.warning(
"%s has file not listed in the manifest: %s", zip_file_name, extra_file
)
# check for out of sequence files by name
missing_files_by_name = find_missing_files_by_name(files)
for missing_file in missing_files_by_name:
LOGGER.warning("file misisng from expected numeric sequence: %s" % missing_file)
LOGGER.warning(
"%s has file misisng from expected numeric sequence: %s",
zip_file_name,
missing_file,
)

return True

Expand Down
25 changes: 15 additions & 10 deletions tests/test_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,26 @@
from xml.etree import ElementTree
from mock import patch
import wand
from elifecleaner import configure_logging, parse, zip_lib
from elifecleaner import LOGGER, configure_logging, parse, zip_lib
from tests.helpers import delete_files_in_folder, read_fixture


class TestParse(unittest.TestCase):
def setUp(self):
self.temp_dir = "tests/tmp"
self.log_file = os.path.join(self.temp_dir, "test.log")
configure_logging(self.log_file)
self.log_handler = configure_logging(self.log_file)

def tearDown(self):
LOGGER.removeHandler(self.log_handler)
delete_files_in_folder(self.temp_dir, filter_out=[".keepme"])

def test_check_ejp_zip(self):
zip_file = "tests/test_data/30-01-2019-RA-eLife-45644.zip"
zip_file_name = zip_file.split(os.sep)[-1]
warning_prefix = (
"WARNING elifecleaner:parse:check_ejp_zip: multiple page PDF figure file:"
)
"WARNING elifecleaner:parse:check_ejp_zip: %s multiple page PDF figure file:"
) % zip_file_name
expected = [
"%s 30-01-2019-RA-eLife-45644/Appendix 1figure 10.pdf\n" % warning_prefix,
"%s 30-01-2019-RA-eLife-45644/Appendix 1figure 11.pdf\n" % warning_prefix,
Expand All @@ -39,6 +41,7 @@ def test_check_ejp_zip_missing_file(self):
zip_file = "tests/test_data/08-11-2020-FA-eLife-64719.zip"
# remove a file from a copy of the zip file for testing
test_zip_file_name = os.path.join(self.temp_dir, "test_missing_file.zip")
zip_file_name = test_zip_file_name.split(os.sep)[-1]

remove_files = ["08-11-2020-FA-eLife-64719/eLife64719_figure2_classB.png"]
with zipfile.ZipFile(zip_file, "r") as input_zipfile:
Expand All @@ -49,8 +52,8 @@ def test_check_ejp_zip_missing_file(self):
zip_info, input_zipfile.read(zip_info.filename)
)

warning_prefix = "WARNING elifecleaner:parse:check_ejp_zip:"
missing_file_prefix = "zip does not contain a file in the manifest:"
warning_prefix = "WARNING elifecleaner:parse:check_ejp_zip: %s" % zip_file_name
missing_file_prefix = "does not contain a file in the manifest:"
expected = [
"%s %s eLife64719_figure2_classB.png\n"
% (warning_prefix, missing_file_prefix),
Expand All @@ -68,6 +71,7 @@ def test_check_ejp_zip_extra_file(self):
zip_file = "tests/test_data/08-11-2020-FA-eLife-64719.zip"
# alter the manifest XML in the zip file for testing
test_zip_file_name = os.path.join(self.temp_dir, "test_missing_file.zip")
zip_file_name = test_zip_file_name.split(os.sep)[-1]

xml_file_name = "08-11-2020-FA-eLife-64719/08-11-2020-FA-eLife-64719.xml"

Expand All @@ -86,8 +90,8 @@ def test_check_ejp_zip_extra_file(self):
zip_info, input_zipfile.read(zip_info.filename)
)

warning_prefix = "WARNING elifecleaner:parse:check_ejp_zip:"
extra_file_prefix = "file not listed in the manifest:"
warning_prefix = "WARNING elifecleaner:parse:check_ejp_zip: %s" % zip_file_name
extra_file_prefix = "has file not listed in the manifest:"
expected = [
"%s %s 08-11-2020-FA-eLife-64719.pdf\n"
% (warning_prefix, extra_file_prefix),
Expand All @@ -110,6 +114,7 @@ def test_check_ejp_zip_missing_file_by_name(self):
zip_file = "tests/test_data/08-11-2020-FA-eLife-64719.zip"
# alter the manifest XML in the zip file for testing
test_zip_file_name = os.path.join(self.temp_dir, "test_missing_file.zip")
zip_file_name = test_zip_file_name.split(os.sep)[-1]

xml_file_name = "08-11-2020-FA-eLife-64719/08-11-2020-FA-eLife-64719.xml"

Expand All @@ -129,8 +134,8 @@ def test_check_ejp_zip_missing_file_by_name(self):
zip_info, input_zipfile.read(zip_info.filename)
)

warning_prefix = "WARNING elifecleaner:parse:check_ejp_zip:"
extra_file_prefix = "file misisng from expected numeric sequence:"
warning_prefix = "WARNING elifecleaner:parse:check_ejp_zip: %s" % zip_file_name
extra_file_prefix = "has file misisng from expected numeric sequence:"
expected = [
"%s %s Figure 2\n" % (warning_prefix, extra_file_prefix),
]
Expand Down

0 comments on commit 523b870

Please sign in to comment.