Skip to content

Commit

Permalink
Merge pull request #190 from elifesciences/develop
Browse files Browse the repository at this point in the history
PR for version 0.79.0 release
  • Loading branch information
gnott authored Mar 5, 2025
2 parents 1dbf67b + 8f7c261 commit d1723a5
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 23 deletions.
2 changes: 1 addition & 1 deletion elifecleaner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging


__version__ = "0.78.0"
__version__ = "0.79.0"


LOGGER = logging.getLogger(__name__)
Expand Down
47 changes: 25 additions & 22 deletions elifecleaner/equation.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,9 @@ def transform_equations(sub_article_root, identifier):
def disp_formula_tag_index_groups(body_tag, identifier):
"find p tags which have inline-graphic tags to convert to disp-formula"
index_groups = []
tag_id_index = 0
tag_id_index = 1
for tag_index, parent_tag in enumerate(body_tag.iterfind("*")):
# count the inline-graphic tags to get an id value
if parent_tag.findall("inline-graphic") or (
parent_tag.tag == "disp-formula" and parent_tag.findall("graphic")
):
tag_id_index += 1
if block.is_p_inline_graphic(
tag=parent_tag,
sub_article_id=None,
Expand All @@ -48,6 +44,12 @@ def disp_formula_tag_index_groups(body_tag, identifier):
"tag_index": tag_id_index,
}
index_groups.append(detail)
tag_id_index += 1
elif parent_tag.findall("inline-graphic") or (
parent_tag.tag == "disp-formula" and parent_tag.findall("graphic")
):
# count the inline-graphic tags to get an id value
tag_id_index += len(parent_tag.findall("inline-graphic"))
return index_groups


Expand Down Expand Up @@ -140,14 +142,8 @@ def transform_inline_equations(sub_article_root, identifier):
def inline_formula_tag_index_groups(body_tag, identifier):
"find p tags which have inline-graphic tags to convert to inline-formula"
index_groups = []
tag_id_index = 0
tag_id_index = 1
for tag_index, parent_tag in enumerate(body_tag.iterfind("*")):
# count the inline-graphic tags to get an id value
if parent_tag.findall("inline-graphic") or (
parent_tag.tag == "disp-formula" and parent_tag.findall("graphic")
):
tag_id_index += 1

if parent_tag.find("inline-graphic") is not None and not (
block.is_p_inline_graphic(
tag=parent_tag,
Expand All @@ -156,21 +152,28 @@ def inline_formula_tag_index_groups(body_tag, identifier):
identifier=identifier,
)
):
detail = {
"label_index": None,
"caption_index": None,
"inline_graphic_index": tag_index,
"tag_index": tag_id_index,
}

index_groups.append(detail)
for inline_graphic_tag in parent_tag.findall("inline-graphic"):
detail = {
"label_index": None,
"caption_index": None,
"inline_graphic_index": tag_index,
"tag_index": tag_id_index,
}

index_groups.append(detail)
tag_id_index += 1
elif parent_tag.findall("inline-graphic") or (
parent_tag.tag == "disp-formula" and parent_tag.findall("graphic")
):
# count the inline-graphic tags to get an id value
tag_id_index += len(parent_tag.findall("inline-graphic"))

return index_groups


def transform_inline_formulas(body_tag, index_groups, sub_article_id):
"transform p tags in the body_tag to table-wrap tags as listed in table_index_groups"
for group in reversed(index_groups):
"transform inline-graphic tags to inline-formula tags as listed in index_groups"
for group in index_groups:
index = group.get("tag_index")
transform_inline_formula(body_tag, index, group, sub_article_id)

Expand Down
149 changes: 149 additions & 0 deletions tests/test_equation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,29 @@ def test_graphic_hrefs_no_match(self):
result = equation.inline_formula_graphic_hrefs(tag, identifier)
self.assertEqual(result, expected)

def test_multiple(self):
"test collecting multiple inline-formula inline-graphic data"
xml_string = (
'<sub-article xmlns:xlink="http://www.w3.org/1999/xlink" id="sa1">'
"<body>"
"<p>An inline equation"
' <inline-formula id="sa1equ1">'
'<inline-graphic xlink:href="elife-sa1-equ1.jpg" />'
"</inline-formula>"
" and another inline equation"
' <inline-formula id="sa1equ2">'
'<inline-graphic xlink:href="elife-sa1-equ2.jpg" />'
"</inline-formula>"
".</p>"
"</body>"
"</sub-article>"
)
identifier = "test.zip"
tag = ElementTree.fromstring(xml_string)
expected = ["elife-sa1-equ1.jpg", "elife-sa1-equ2.jpg"]
result = equation.inline_formula_graphic_hrefs(tag, identifier)
self.assertEqual(result, expected)


class TestTransformEquations(unittest.TestCase):
"tests for transform_equations()"
Expand Down Expand Up @@ -102,6 +125,31 @@ def test_disp_formula(self):
# assert
self.assertEqual(result, expected)

def test_multiple(self):
"test a sample with multiple inline-formula and disp-formula"
body_tag = ElementTree.fromstring(
'<body xmlns:xlink="http://www.w3.org/1999/xlink">'
"<p>An inline equation"
' <inline-graphic xlink:href="elife-inf1.jpg"/>'
' and another inline equation <inline-graphic xlink:href="elife-inf2.jpg"/>.</p>'
"<p>Following is a display formula:</p>"
'<p><inline-graphic xlink:href="elife-inf2.jpg"/></p>'
"</body>"
)
identifier = "10.7554/eLife.95901.1"
expected = [
{
"label_index": None,
"caption_index": None,
"inline_graphic_index": 2,
"tag_index": 3,
}
]
# invoke
result = equation.disp_formula_tag_index_groups(body_tag, identifier)
# assert
self.assertEqual(result, expected)


class TestTransformDispFormulas(unittest.TestCase):
"tests for transform_disp_formulas()"
Expand Down Expand Up @@ -326,6 +374,39 @@ def test_transform_inline_equations(self):
# assert
self.assertEqual(ElementTree.tostring(result).decode("utf8"), expected)

def test_transform_multiple(self):
"test converting multiple inline-graphic tags to inline-formula"
xmlio.register_xmlns()
sub_article_root = ElementTree.fromstring(
'<sub-article id="sa1" xmlns:xlink="http://www.w3.org/1999/xlink">'
"<body>"
"<p>An inline equation"
' <inline-graphic xlink:href="elife-inf1.jpg"/>'
' and another inline equation <inline-graphic xlink:href="elife-inf2.jpg"/>.</p>'
"</body>"
"</sub-article>"
)
identifier = "10.7554/eLife.95901.1"
expected = (
'<sub-article xmlns:xlink="http://www.w3.org/1999/xlink" id="sa1">'
"<body>"
"<p>An inline equation"
' <inline-formula id="sa1equ1">'
'<inline-graphic xlink:href="elife-sa1-equ1.jpg" />'
"</inline-formula>"
" and another inline equation"
' <inline-formula id="sa1equ2">'
'<inline-graphic xlink:href="elife-sa1-equ2.jpg" />'
"</inline-formula>"
".</p>"
"</body>"
"</sub-article>"
)
# invoke
result = equation.transform_inline_equations(sub_article_root, identifier)
# assert
self.assertEqual(ElementTree.tostring(result).decode("utf8"), expected)


class TestInlineFormulaTagIndexGroups(unittest.TestCase):
"tests for inline_formula_tag_index_groups()"
Expand Down Expand Up @@ -354,6 +435,35 @@ def test_inline_formula(self):
# assert
self.assertEqual(result, expected)

def test_multiple(self):
"test multiple inline-graphic tags in a p tag to be converted to inline-formula"
body_tag = ElementTree.fromstring(
'<body xmlns:xlink="http://www.w3.org/1999/xlink">'
"<p>An inline equation"
' <inline-graphic xlink:href="elife-inf1.jpg"/>'
' and another inline equation <inline-graphic xlink:href="elife-inf2.jpg"/>.</p>'
"</body>"
)
identifier = "10.7554/eLife.95901.1"
expected = [
{
"label_index": None,
"caption_index": None,
"inline_graphic_index": 0,
"tag_index": 1,
},
{
"label_index": None,
"caption_index": None,
"inline_graphic_index": 0,
"tag_index": 2,
},
]
# invoke
result = equation.inline_formula_tag_index_groups(body_tag, identifier)
# assert
self.assertEqual(result, expected)


class TestTransformInlineFormulas(unittest.TestCase):
"tests for transform_inline_formulas()"
Expand Down Expand Up @@ -395,6 +505,45 @@ def test_transform_inline_formulas(self):
# assert
self.assertEqual(ElementTree.tostring(body_tag).decode("utf8"), expected)

def test_multiple(self):
"convert multiple inline-graphic tags in a p tag to inline-formula"
xmlio.register_xmlns()
body_tag = ElementTree.fromstring(
'<body xmlns:xlink="http://www.w3.org/1999/xlink">'
"<p>An inline equation"
' <inline-graphic xlink:href="elife-inf1.jpg"/>'
' and another inline equation <inline-graphic xlink:href="elife-inf2.jpg"/>.</p>'
"</body>"
)
index_groups = [
{
"inline_graphic_index": 0,
"tag_index": 1,
},
{
"inline_graphic_index": 0,
"tag_index": 2,
},
]
sub_article_id = "sa1"
expected = (
'<body xmlns:xlink="http://www.w3.org/1999/xlink">'
"<p>An inline equation"
' <inline-formula id="sa1equ1">'
'<inline-graphic xlink:href="elife-sa1-equ1.jpg" />'
"</inline-formula>"
" and another inline equation"
' <inline-formula id="sa1equ2">'
'<inline-graphic xlink:href="elife-sa1-equ2.jpg" />'
"</inline-formula>"
".</p>"
"</body>"
)
# invoke
equation.transform_inline_formulas(body_tag, index_groups, sub_article_id)
# assert
self.assertEqual(ElementTree.tostring(body_tag).decode("utf8"), expected)


class TestTransformInlineFormula(unittest.TestCase):
"tests for transform_inline_formula()"
Expand Down

0 comments on commit d1723a5

Please sign in to comment.