Skip to content

Commit

Permalink
Merge pull request #178 from elifesciences/develop
Browse files Browse the repository at this point in the history
PR for version 0.74.0 release
  • Loading branch information
gnott authored Nov 28, 2024
2 parents 7bc2681 + a060467 commit 9236bd4
Show file tree
Hide file tree
Showing 8 changed files with 79 additions and 26 deletions.
2 changes: 1 addition & 1 deletion elifecleaner/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging


__version__ = "0.73.0"
__version__ = "0.74.0"


LOGGER = logging.getLogger(__name__)
Expand Down
13 changes: 13 additions & 0 deletions elifecleaner/block.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,19 @@ def tag_index_groups(body_tag, sub_article_id, block_type, identifier):
return fig_index_groups


def graphic_href_list(body_tag, index_groups):
"collect a list of xlink:href values of graphic tags from the index_groups"
href_list = []
for group in index_groups:
if group.get("inline_graphic_index"):
inline_graphic_p = body_tag[group.get("inline_graphic_index")]
inline_graphic_tag = inline_graphic_tag_from_tag(inline_graphic_p)
image_href = utils.xlink_href(inline_graphic_tag)
if image_href:
href_list.append(image_href)
return href_list


def title_paragraph_content(string_list):
"from list of strings repair inline formatting tags and split into title and paragraph"
# check for nested inline formatting tags
Expand Down
16 changes: 3 additions & 13 deletions elifecleaner/fig.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,6 @@
from elifecleaner import block, utils


def inf_file_identifier(inf_file_name):
"specific part of an inline graphic file name, e.g. inf1 in elife-70493-inf1.png"
return inf_file_name.rsplit(".", 1)[0].rsplit("-", 1)[-1]


def fig_file_name_identifier(sub_article_id, fig_index):
"create the unique portion of a fig file name"
return "%s-fig%s" % (sub_article_id, fig_index)
Expand All @@ -20,7 +15,8 @@ def fig_id(sub_article_id, fig_index):
def fig_file_name(inf_file_name, sub_article_id, fig_index):
"from inf file name create a new fig file name"
return inf_file_name.replace(
inf_file_identifier(inf_file_name), "%s-fig%s" % (sub_article_id, fig_index)
utils.inf_file_identifier(inf_file_name),
"%s-fig%s" % (sub_article_id, fig_index),
)


Expand All @@ -45,13 +41,7 @@ def inline_graphic_hrefs(sub_article_root, identifier):
if body_tag is not None:
# match paragraphs with fig data in them and record the tag indexes
fig_index_groups = fig_tag_index_groups(body_tag, sub_article_id, identifier)
for group in fig_index_groups:
if group.get("inline_graphic_index"):
inline_graphic_p = body_tag[group.get("inline_graphic_index")]
inline_graphic_tag = block.inline_graphic_tag_from_tag(inline_graphic_p)
image_href = utils.xlink_href(inline_graphic_tag)
if image_href:
href_list.append(image_href)
href_list = block.graphic_href_list(body_tag, fig_index_groups)
return href_list


Expand Down
25 changes: 24 additions & 1 deletion elifecleaner/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,32 @@ def table_wrap_id(sub_article_id, table_index):
return "%stable%s" % (sub_article_id, table_index)


def table_file_name(inf_file_name, sub_article_id, table_index):
"from inf file name create a new table file name"
return inf_file_name.replace(
utils.inf_file_identifier(inf_file_name),
"%s-table%s" % (sub_article_id, table_index),
)


def table_tag_index_groups(body_tag, sub_article_id, identifier):
"iterate through the tags in body_tag and find groups of tags to be converted to a table-wrap"
return block.tag_index_groups(body_tag, sub_article_id, "table", identifier)


def table_inline_graphic_hrefs(sub_article_root, identifier):
"get inline-graphic href values"
sub_article_id, body_tag = block.sub_article_tag_parts(sub_article_root)
href_list = []
if body_tag is not None:
# match paragraphs with table data in them and record the tag indexes
table_index_groups = table_tag_index_groups(
body_tag, sub_article_id, identifier
)
href_list = block.graphic_href_list(body_tag, table_index_groups)
return href_list


def transform_table_group(body_tag, table_index, table_group, sub_article_id):
"transform one set of p tags into table-wrap tags as specified in the table_group dict"
inline_graphic_p_tag = body_tag[table_group.get("inline_graphic_index")]
Expand All @@ -30,8 +51,10 @@ def transform_table_group(body_tag, table_index, table_group, sub_article_id):
inline_graphic_p_tag, body_tag, table_group.get("caption_index")
)

# rename the image file
new_file_name = table_file_name(image_href, sub_article_id, table_index)

# graphic tag
new_file_name = image_href
block.set_graphic_tag(inline_graphic_p_tag, image_href, new_file_name)

# convert inline-graphic p tag to a table-wrap tag
Expand Down
5 changes: 5 additions & 0 deletions elifecleaner/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ def file_extension(file_name):
return file_name.rsplit(".", 1)[-1]


def inf_file_identifier(inf_file_name):
"specific part of an inline graphic file name, e.g. inf1 in elife-70493-inf1.png"
return inf_file_name.rsplit(".", 1)[0].rsplit("-", 1)[-1]


# match ascii characters from decimal 0 to 31, as hexidecimal character entitiy strings
# e.g.  or 
CONTROL_CHARACTER_ENTITY_MATCH_PATTERN = r"&#x0{0,2}[0-1][0-9A-Fa-f];"
Expand Down
10 changes: 0 additions & 10 deletions tests/test_fig.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,6 @@
)


class TestInfFileIdentifier(unittest.TestCase):
"tests for fig.inf_file_identifier()"

def test_inf_file_identifer(self):
"identifier portion of an inline-graphic file name"
inf_file_name = "elife-70493-inf1.png"
expected = "inf1"
self.assertEqual(fig.inf_file_identifier(inf_file_name), expected)


class TestFigFileNameIdentifer(unittest.TestCase):
"tests for fig.fig_file_name_identifier()"

Expand Down
24 changes: 23 additions & 1 deletion tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,28 @@ def table_sub_article_xml_fixture():
)


class TestTableInlineGraphicHrefs(unittest.TestCase):
"tests for table.table_inline_graphic_hrefs()"

def test_table_inline_graphic_hrefs(self):
"get a list of xlink:href values from inline-graphic tags to be converted to table-wrap"
xml_string = (
b'<sub-article id="sa1" xmlns:xlink="http://www.w3.org/1999/xlink">'
b"<body>"
b"<p><bold>Review table 1.</bold></p>"
b'<p><inline-graphic xlink:href="elife-70493-inf1.png"/></p>'
b"<p>Next paragraph is not an inline-graphic href.</p>"
b'<p><inline-graphic xlink:href="elife-70493-inf2.png"/></p>'
b"</body>"
b"</sub-article>"
)
identifier = "test.zip"
tag = ElementTree.fromstring(xml_string)
expected = ["elife-70493-inf1.png"]
result = table.table_inline_graphic_hrefs(tag, identifier)
self.assertEqual(result, expected)


class TestTransformTable(unittest.TestCase):
"tests for table.transform_table()"

Expand Down Expand Up @@ -77,7 +99,7 @@ def test_referee_report(self):
"<title>Table title.</title>"
"<p>This is the caption for this table that describes what it contains.</p>"
"</caption>"
'<graphic mimetype="image" mime-subtype="png" xlink:href="elife-70493-inf1.png" />'
'<graphic mimetype="image" mime-subtype="png" xlink:href="elife-70493-sa1-table1.png" />'
"</table-wrap>"
"<p>Another paragraph with an inline graphic "
'<inline-graphic xlink:href="elife-70493-inf2.jpg" />'
Expand Down
10 changes: 10 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ def test_file_extension(self):
)


class TestInfFileIdentifier(unittest.TestCase):
"tests for utils.inf_file_identifier()"

def test_inf_file_identifer(self):
"identifier portion of an inline-graphic file name"
inf_file_name = "elife-70493-inf1.png"
expected = "inf1"
self.assertEqual(utils.inf_file_identifier(inf_file_name), expected)


class TestMatchControlCharacterEntities(unittest.TestCase):
def test_match_control_character_entities(self):
self.assertEqual([], utils.match_control_character_entities(""))
Expand Down

0 comments on commit 9236bd4

Please sign in to comment.