Skip to content

Commit

Permalink
feat: prevent processing errors from empty XML tags
Browse files Browse the repository at this point in the history
Create helper function to remove empty XML tags to avoid unexpected
behavior when processing elements like keywords in
`workbook.get_description`.
  • Loading branch information
clnsmth authored Aug 31, 2024
1 parent 87a66d0 commit 574970e
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/spinneret/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from os import environ
from json import load
from lxml import etree


def load_configuration(config_file: str) -> None:
Expand All @@ -18,3 +19,14 @@ def load_configuration(config_file: str) -> None:
config = load(config)
for key, value in config.items():
environ[key] = value


def delete_empty_tags(xml: etree._ElementTree) -> etree._ElementTree:
"""Deletes empty tags from an XML file
:param xml: The XML file to be cleaned.
:returns: The cleaned XML file.
"""
for element in xml.xpath(".//*[not(node())]"):
element.getparent().remove(element)
return xml
25 changes: 25 additions & 0 deletions tests/test_utilities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Test utilities module"""

from lxml import etree
from spinneret import datasets
from spinneret.utilities import delete_empty_tags


def test_delete_empty_tags():
"""Test that empty tags are removed from an XML file"""

# Read test file
eml_file = datasets.get_example_eml_dir() + "/" + "edi.3.9.xml"
eml = etree.parse(eml_file)

# Add an empty tag to the XML and check that it is added
# to the XML
empty_tag = etree.Element("empty_tag")
eml.getroot().append(empty_tag)
assert len(eml.xpath(".//empty_tag")) == 1

# Remove empty tags
eml = delete_empty_tags(eml)

# Check that the empty tag has been removed
assert len(eml.xpath(".//empty_tag")) == 0

0 comments on commit 574970e

Please sign in to comment.