diff --git a/src/spinneret/workbook.py b/src/spinneret/workbook.py index f140a43..6d6c8ab 100644 --- a/src/spinneret/workbook.py +++ b/src/spinneret/workbook.py @@ -161,11 +161,17 @@ def get_description(element: etree._Element) -> str: if element.tag in "dataset": # Add abstract and keywords, they are descriptive of the entire dataset abstract = element.xpath("./abstract") - abstract = etree.tostring(abstract[0], encoding="utf-8", method="text") - abstract = abstract.decode("utf-8").strip() + if len(abstract) != 0: # abstract is optional + abstract = etree.tostring(abstract[0], encoding="utf-8", method="text") + abstract = abstract.decode("utf-8").strip() + else: + abstract = "" keywords = element.xpath(".//keyword") - keywords = [k.text for k in keywords] - description = abstract + " " + " ".join(keywords) + if len(keywords) != 0: # keywords are optional + keywords = [k.text for k in keywords] + else: + keywords = "" + description = abstract + " ".join(keywords) elif element.tag in entities: description = element.findtext(".//entityName") elif element.tag in "attribute": diff --git a/tests/edi.3.9_annotation_workbook.tsv b/tests/edi.3.9_annotation_workbook.tsv index fbc465e..6e2192a 100644 --- a/tests/edi.3.9_annotation_workbook.tsv +++ b/tests/edi.3.9_annotation_workbook.tsv @@ -1,5 +1,5 @@ package_id url element element_id element_xpath context description subject predicate predicate_id object object_id author date comment -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 dataset ccbc2d88-fff7-471c-a123-4f30f533b707 /eml:eml/dataset edi.3.9 "The Santa Barbara Channel Marine Biodiversity Observation Network +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 dataset 87feede4-13bc-412c-9d81-40545eb54e22 /eml:eml/dataset edi.3.9 "The Santa Barbara Channel Marine Biodiversity Observation Network (SBCMBON) tracks long-term patterns in species abundance and diversity. This dataset contains cover of kelp forest sessile invertebrates, understory macroalgae, and substrate types by @@ -76,22 +76,22 @@ edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 da Kevin Lafferty Klafferty@usgs.gov - Mike Kenner mkenner@ucsc.edu Population Abundance BasisofRecord: HumanObservation Occurrence: OrganismQuantity Taxon: ScientificName algae invertebrate random point contact Santa Barbara Channel Marine BON uniform point contact" dataset -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 dataTable 74397dfa-151e-4eb8-818b-c9d63ead8272 /eml:eml/dataset/dataTable dataset SBCMBON kelp forest integrated benthic cover biological survey SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 6a2d4c50-e984-4dc0-91b5-4dd5a5ab989f /eml:eml/dataset/dataTable/attributeList/attribute[1] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Source project for this data data_source -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 874b9da5-7494-44ef-ba52-149220062520 /eml:eml/dataset/dataTable/attributeList/attribute[2] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Sampling method sample_method -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute d2e4f6af-b556-4a0a-97e8-67a551590a92 /eml:eml/dataset/dataTable/attributeList/attribute[3] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Date of survey date -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute f5304e0a-caa5-4f1d-b686-039722d3cda3 /eml:eml/dataset/dataTable/attributeList/attribute[4] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv ID of a site, assigned by each project site_id -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute b534c98f-3d20-46ba-8d2c-4c188418d48f /eml:eml/dataset/dataTable/attributeList/attribute[5] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Identifier for the subsite,one level below site subsite_id -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 28d7b009-3d4b-4535-943b-647cfdaae22d /eml:eml/dataset/dataTable/attributeList/attribute[6] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Identifier for the transect transect_id -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 5daece56-6088-44c1-8434-ec8dba727b80 /eml:eml/dataset/dataTable/attributeList/attribute[7] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Identifier for the replicate replicate_id -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 790a95e1-22f2-4dac-8b36-7e0114f26bc4 /eml:eml/dataset/dataTable/attributeList/attribute[8] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Code assigned by SBC MBON for this taxon from this data source (project) proj_taxon_id -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 1370ae52-41da-49d1-92dd-d5077500cfd9 /eml:eml/dataset/dataTable/attributeList/attribute[9] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Number of total points counted on a UPC or RPC survey points -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute d9ac0022-c157-416d-9f93-509f629273a1 /eml:eml/dataset/dataTable/attributeList/attribute[10] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Number of organisms counted count -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 7de66f26-b649-459f-ad93-122d7eda3b1b /eml:eml/dataset/dataTable/attributeList/attribute[11] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Taxon code assigned by an authoritative source auth_taxon_id -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute f8eb86e2-f02a-4ff9-bd9a-c0b810c312db /eml:eml/dataset/dataTable/attributeList/attribute[12] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Name of the athority or registry assigning the Authoritative Taxon Code auth_name -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 686a51da-c990-45fe-a8bb-94fc5e52d41b /eml:eml/dataset/dataTable/attributeList/attribute[13] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Taxon name, usually species binomial or other taxon name taxon_name -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 65cbc590-943a-4656-824b-e449b28cca01 /eml:eml/dataset/dataTable/attributeList/attribute[14] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv The site, as named by each project site_name -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 93fa3786-fde3-453e-8dd8-85f892ce6191 /eml:eml/dataset/dataTable/attributeList/attribute[15] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Survey region within a site subsite_name -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 3bc6f4fc-40c1-47c5-96c7-5a6b2b6d649b /eml:eml/dataset/dataTable/attributeList/attribute[16] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Site latitude latitude -edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 18301dfd-766b-43d8-ae27-31ae32cd4828 /eml:eml/dataset/dataTable/attributeList/attribute[17] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Site longitude longitude + Mike Kenner mkenner@ucsc.eduPopulation Abundance BasisofRecord: HumanObservation Occurrence: OrganismQuantity Taxon: ScientificName algae invertebrate random point contact Santa Barbara Channel Marine BON uniform point contact" dataset +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 dataTable 8b1d8a06-a062-485f-95aa-e0e08a0dcaf2 /eml:eml/dataset/dataTable dataset SBCMBON kelp forest integrated benthic cover biological survey SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute d81ab758-0831-411a-984f-a76d6d6f5582 /eml:eml/dataset/dataTable/attributeList/attribute[1] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Source project for this data data_source +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 66c97223-4317-4c91-8177-bbd83ed0f847 /eml:eml/dataset/dataTable/attributeList/attribute[2] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Sampling method sample_method +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 42df9151-9914-486b-9f4f-694e84aeaf72 /eml:eml/dataset/dataTable/attributeList/attribute[3] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Date of survey date +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 552f7665-87ab-4d16-857d-00325b25d39e /eml:eml/dataset/dataTable/attributeList/attribute[4] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv ID of a site, assigned by each project site_id +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 38abf50a-1f6e-494f-9537-c3f3b60f5269 /eml:eml/dataset/dataTable/attributeList/attribute[5] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Identifier for the subsite,one level below site subsite_id +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute a9b7494f-4185-4779-8543-a55917fa6841 /eml:eml/dataset/dataTable/attributeList/attribute[6] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Identifier for the transect transect_id +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 9c4664f8-e21c-4208-a6e2-2eb44deb594e /eml:eml/dataset/dataTable/attributeList/attribute[7] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Identifier for the replicate replicate_id +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 69fcc003-992a-4c37-9132-44608515bda7 /eml:eml/dataset/dataTable/attributeList/attribute[8] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Code assigned by SBC MBON for this taxon from this data source (project) proj_taxon_id +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 1dabc36c-12be-4ac1-9ccd-7eb72d1643a0 /eml:eml/dataset/dataTable/attributeList/attribute[9] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Number of total points counted on a UPC or RPC survey points +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute b6561c2f-b615-4bea-b81f-d1bb35f02601 /eml:eml/dataset/dataTable/attributeList/attribute[10] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Number of organisms counted count +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute db6d969e-bb57-4cd5-a94d-e6c45234741b /eml:eml/dataset/dataTable/attributeList/attribute[11] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Taxon code assigned by an authoritative source auth_taxon_id +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 2fde667d-f9a1-499c-b4a8-425e9cda9663 /eml:eml/dataset/dataTable/attributeList/attribute[12] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Name of the athority or registry assigning the Authoritative Taxon Code auth_name +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 2a204613-bb44-4229-90a5-e027739a3c9f /eml:eml/dataset/dataTable/attributeList/attribute[13] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Taxon name, usually species binomial or other taxon name taxon_name +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 856bb74d-6331-43cd-908b-2e12666b9a11 /eml:eml/dataset/dataTable/attributeList/attribute[14] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv The site, as named by each project site_name +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 51b73b52-a8b0-4a54-8f1f-306e67e9e8e3 /eml:eml/dataset/dataTable/attributeList/attribute[15] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Survey region within a site subsite_name +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute c505ba1b-531e-401c-a19e-84cdfa76a4f3 /eml:eml/dataset/dataTable/attributeList/attribute[16] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Site latitude latitude +edi.3.9 https://portal.edirepository.org/nis/metadataviewer?packageid=edi.3.9 attribute 9c3341b5-2dd8-4fb6-863e-dce86c05128b /eml:eml/dataset/dataTable/attributeList/attribute[17] SBCMBON_kelp_forest_integrated_benthic_cover_20210120.csv Site longitude longitude diff --git a/tests/test_workbook.py b/tests/test_workbook.py index 2b08197..f004276 100644 --- a/tests/test_workbook.py +++ b/tests/test_workbook.py @@ -54,3 +54,22 @@ def test_get_description(): description = get_description(element) assert isinstance(description, str) assert len(description) > 0 + + +def test_get_description_handles_missing_element(): + """Test that the get_description function returns an empty string when the + optional elements are missing""" + + # Read test file + eml_file = datasets.get_example_eml_dir() + "/" + "edi.3.9.xml" + eml = etree.parse(eml_file) + + # Remove abstract and keywordSet elements from dataset + element = eml.xpath(".//dataset")[0] + element.remove(element.find("abstract")) + for kw in element.findall(".//keywordSet"): + element.remove(kw) + + # Test element with missing abstract + description = get_description(element) + assert description == ""