contrib(names): split xml and orcid transformers

inveniosoftware · Jan 21, 2022 · 88b85e1 · 88b85e1
1 parent 34a844b
commit 88b85e1
Show file tree

Hide file tree

Showing 3 changed files with 58 additions and 23 deletions.
diff --git a/invenio_vocabularies/contrib/names/datastreams.py b/invenio_vocabularies/contrib/names/datastreams.py
@@ -1,14 +1,13 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2021 CERN.
+# Copyright (C) 2021-2022 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
 # details.
 
 """Names datastreams, transformers, writers and readers."""
 
-import requests
 from invenio_access.permissions import system_identity
 from invenio_pidstore.errors import PIDDoesNotExistError
 from invenio_records.dictutils import dict_lookup
@@ -17,7 +16,7 @@
 from ...datastreams import StreamEntry
 from ...datastreams.errors import TransformerError, WriterError
 from ...datastreams.readers import SimpleHTTPReader
-from ...datastreams.transformers import XMLTransformer
+from ...datastreams.transformers import BaseTransformer
 from ...datastreams.writers import ServiceWriter
 
 
@@ -34,18 +33,12 @@ def __init__(self, *args, test_mode=True, **kwargs):
         super().__init__(origin, *args, **kwargs)
 
 
-class OrcidXMLTransformer(XMLTransformer):
-    """ORCiD XML Transfomer."""
+class OrcidTransformer(BaseTransformer):
+    """Transforms an ORCiD record into a names record."""
 
     def apply(self, stream_entry, **kwargs):
         """Applies the transformation to the stream entry."""
-        xml_tree = self._xml_to_etree(stream_entry.entry)
-        researcher = self._etree_to_dict(xml_tree)
-        record = researcher["html"]["body"].get("record")
-
-        if not record:
-            raise TransformerError(f"Record not found in ORCiD entry.")
-
+        record = stream_entry.entry
         person = record["person"]
         orcid_id = record["orcid-identifier"]["uri"]
 
@@ -132,7 +125,7 @@ def write(self, stream_entry, *args, **kwargs):
 
 
 VOCABULARIES_DATASTREAM_TRANSFORMERS = {
-    "orcid-xml": OrcidXMLTransformer,
+    "orcid": OrcidTransformer,
 }
 """ORCiD Data Streams transformers."""
 
@@ -151,7 +144,8 @@ def write(self, stream_entry, *args, **kwargs):
         }
     },
     "transformers": [
-        {"type": "orcid-xml"}
+        {"type": "xml"},
+        {"type": "orcid"}
     ],
     "writers": [{
         "type": "names-service",

diff --git a/tests/contrib/names/test_names_datastreams.py b/tests/contrib/names/test_names_datastreams.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2021 CERN.
+# Copyright (C) 2021-2022 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -16,7 +16,7 @@
 
 from invenio_vocabularies.contrib.names.api import Name
 from invenio_vocabularies.contrib.names.datastreams import \
-    NamesServiceWriter, OrcidHTTPReader, OrcidXMLTransformer
+    NamesServiceWriter, OrcidHTTPReader, OrcidTransformer
 from invenio_vocabularies.contrib.names.services import NamesService, \
     NamesServiceConfig
 from invenio_vocabularies.datastreams import StreamEntry
@@ -122,9 +122,44 @@ def bytes_xml_entry():
     return StreamEntry(XML_ENTRY_DATA)
 
 
-def test_orcid_xml_transformer(bytes_xml_entry, expected_from_xml):
-    transformer = OrcidXMLTransformer()
-    assert expected_from_xml == transformer.apply(bytes_xml_entry).entry
+@pytest.fixture(scope="module")
+def dict_xml_entry():
+    return StreamEntry({
+        'orcid-identifier': {
+            'uri': 'https://orcid.org/0000-0001-8135-3489',
+            'path': '0000-0001-8135-3489',
+            'host': 'orcid.org'
+        },
+        'person': {
+            'name': {
+                'given-names': 'Lars Holm',
+                'family-name': 'Nielsen',
+                '@visibility': 'public',
+                '@path': '0000-0001-8135-3489'
+            },
+            'external-identifiers': {
+                '@path': '/0000-0001-8135-3489/external-identifiers'
+            },
+            '@path': '/0000-0001-8135-3489/person'
+        },
+        'activities-summary': {
+            'employments': {
+                'affiliation-group': {
+                    'employment-summary': {
+                        'organization': {'name': 'CERN'}
+                    }
+                },
+                '@path': '/0000-0001-8135-3489/employments'
+            },
+            '@path': '/0000-0001-8135-3489/activities'
+        },
+        '@path': '/0000-0001-8135-3489'
+    })
+
+
+def test_orcid_transformer(dict_xml_entry, expected_from_xml):
+    transformer = OrcidTransformer()
+    assert expected_from_xml == transformer.apply(dict_xml_entry).entry
 
 
 class MockResponse():

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright (C) 2021 CERN.
+# Copyright (C) 2021-2022 CERN.
 #
 # Invenio-Vocabularies is free software; you can redistribute it and/or
 # modify it under the terms of the MIT License; see LICENSE file for more
@@ -19,9 +19,13 @@
 
 from invenio_vocabularies.cli import _process_vocab, get_config_for_ds, \
     vocabularies
+from invenio_vocabularies.config import VOCABULARIES_DATASTREAM_TRANSFORMERS, \
+    VOCABULARIES_DATASTREAM_WRITERS
 from invenio_vocabularies.contrib.names.api import Name
 from invenio_vocabularies.contrib.names.datastreams import \
-    NamesServiceWriter, OrcidXMLTransformer
+    VOCABULARIES_DATASTREAM_TRANSFORMERS as NAMES_TRANSFORMERS
+from invenio_vocabularies.contrib.names.datastreams import \
+    VOCABULARIES_DATASTREAM_WRITERS as NAMES_WRITERS
 from invenio_vocabularies.contrib.names.services import NamesService, \
     NamesServiceConfig
 
@@ -61,10 +65,12 @@ def base_app(base_app, names_service):
 def app_config(app_config):
     """Mimic an instance's configuration."""
     app_config["VOCABULARIES_DATASTREAM_TRANSFORMERS"] = {
-        "orcid-xml": OrcidXMLTransformer
+        **VOCABULARIES_DATASTREAM_TRANSFORMERS,
+        **NAMES_TRANSFORMERS,
     }
     app_config["VOCABULARIES_DATASTREAM_WRITERS"] = {
-        "names-service": NamesServiceWriter
+        **VOCABULARIES_DATASTREAM_WRITERS,
+        **NAMES_WRITERS,
     }
 
     return app_config