Skip to content

Commit

Permalink
Merge pull request #186 from elifesciences/develop
Browse files Browse the repository at this point in the history
PR for version 0.77.0 release
  • Loading branch information
gnott authored Jan 29, 2025
2 parents e6f890d + 2e52e50 commit b997fac
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 1 deletion.
2 changes: 1 addition & 1 deletion elifecleaner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging


__version__ = "0.76.0"
__version__ = "0.77.0"


LOGGER = logging.getLogger(__name__)
Expand Down
12 changes: 12 additions & 0 deletions elifecleaner/equation.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,18 @@ def equation_inline_graphic_hrefs(sub_article_root, identifier):
return href_list


def formula_graphic_hrefs(sub_article_root, identifier):
"get disp-formula graphic href values"
sub_article_id, body_tag = block.sub_article_tag_parts(sub_article_root)
href_list = []
if body_tag is not None:
for graphic_tag in body_tag.findall(".//disp-formula/graphic"):
image_href = utils.xlink_href(graphic_tag)
if image_href:
href_list.append(image_href)
return href_list


def inline_equation_inline_graphic_hrefs(sub_article_root, identifier):
"get inline-graphic xlink:href values to be inline-formula"
body_tag = block.sub_article_tag_parts(sub_article_root)[1]
Expand Down
12 changes: 12 additions & 0 deletions elifecleaner/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,18 @@ def table_inline_graphic_hrefs(sub_article_root, identifier):
return href_list


def table_graphic_hrefs(sub_article_root, identifier):
"get table-wrap graphic href values"
sub_article_id, body_tag = block.sub_article_tag_parts(sub_article_root)
href_list = []
if body_tag is not None:
for graphic_tag in body_tag.findall(".//table-wrap/graphic"):
image_href = utils.xlink_href(graphic_tag)
if image_href:
href_list.append(image_href)
return href_list


def transform_table_group(body_tag, table_index, table_group, sub_article_id):
"transform one set of p tags into table-wrap tags as specified in the table_group dict"
inline_graphic_p_tag = body_tag[table_group.get("inline_graphic_index")]
Expand Down
34 changes: 34 additions & 0 deletions tests/test_equation.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,40 @@ def test_equation_inline(self):
self.assertEqual(result, expected)


class TestFormulaGraphicHrefs(unittest.TestCase):
"tests for equation.formula_graphic_hrefs()"

def test_formula_graphic_hrefs(self):
"get a list of xlink:href values from disp-formula graphic tags"
xml_string = (
b'<sub-article id="sa1" xmlns:xlink="http://www.w3.org/1999/xlink"><body>'
b"<p>Following is a display formula:</p>\n"
b'<disp-formula id="sa1equ1">\n'
b'<graphic mimetype="image" mime-subtype="jpg"'
b' xlink:href="elife-sa1-equ1.jpg"/>\n'
b"</disp-formula>\n"
b"</body></sub-article>"
)
identifier = "test.zip"
tag = ElementTree.fromstring(xml_string)
expected = ["elife-sa1-equ1.jpg"]
result = equation.formula_graphic_hrefs(tag, identifier)
self.assertEqual(result, expected)

def test_graphic_hrefs_no_match(self):
"empty list of xlink:href values when there is no graphic tag"
xml_string = (
b'<sub-article id="sa1" xmlns:xlink="http://www.w3.org/1999/xlink">'
b"<body><p/></body>"
b"</sub-article>"
)
identifier = "test.zip"
tag = ElementTree.fromstring(xml_string)
expected = []
result = equation.formula_graphic_hrefs(tag, identifier)
self.assertEqual(result, expected)


class TestInlineEquationInlineGraphicHrefs(unittest.TestCase):
"tests for inline_equation_inline_graphic_hrefs()"

Expand Down
38 changes: 38 additions & 0 deletions tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,44 @@ def test_table_inline_graphic_hrefs(self):
self.assertEqual(result, expected)


class TestTableGraphicHrefs(unittest.TestCase):
"tests for table.table_graphic_hrefs()"

def test_table_graphic_hrefs(self):
"get a list of xlink:href values from table-wrap graphic tags"
xml_string = (
b'<sub-article id="sa1" xmlns:xlink="http://www.w3.org/1999/xlink"><body>'
b'<table-wrap id="sa1table1">\n'
b"<label>Review table 1.</label>\n"
b"<caption>\n"
b"<title>Caption title.</title>\n"
b"<p>Caption paragraph.</p>\n"
b"</caption>\n"
b'<graphic mimetype="image" mime-subtype="jpg"'
b' xlink:href="elife-95901-sa1-table1.jpg"/>\n'
b"</table-wrap>\n"
b"</body></sub-article>"
)
identifier = "test.zip"
tag = ElementTree.fromstring(xml_string)
expected = ["elife-95901-sa1-table1.jpg"]
result = table.table_graphic_hrefs(tag, identifier)
self.assertEqual(result, expected)

def test_graphic_hrefs_no_match(self):
"empty list of xlink:href values when there is no graphic tag"
xml_string = (
b'<sub-article id="sa1" xmlns:xlink="http://www.w3.org/1999/xlink">'
b"<body><p/></body>"
b"</sub-article>"
)
identifier = "test.zip"
tag = ElementTree.fromstring(xml_string)
expected = []
result = table.table_graphic_hrefs(tag, identifier)
self.assertEqual(result, expected)


class TestTransformTable(unittest.TestCase):
"tests for table.transform_table()"

Expand Down

0 comments on commit b997fac

Please sign in to comment.