Skip to content

Commit

Permalink
contrib(names): split xml and orcid transformers
Browse files Browse the repository at this point in the history
  • Loading branch information
Pablo Panero authored and slint committed Jan 21, 2022
1 parent 34a844b commit 88b85e1
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 23 deletions.
22 changes: 8 additions & 14 deletions invenio_vocabularies/contrib/names/datastreams.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2021 CERN.
# Copyright (C) 2021-2022 CERN.
#
# Invenio-Vocabularies is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
# details.

"""Names datastreams, transformers, writers and readers."""

import requests
from invenio_access.permissions import system_identity
from invenio_pidstore.errors import PIDDoesNotExistError
from invenio_records.dictutils import dict_lookup
Expand All @@ -17,7 +16,7 @@
from ...datastreams import StreamEntry
from ...datastreams.errors import TransformerError, WriterError
from ...datastreams.readers import SimpleHTTPReader
from ...datastreams.transformers import XMLTransformer
from ...datastreams.transformers import BaseTransformer
from ...datastreams.writers import ServiceWriter


Expand All @@ -34,18 +33,12 @@ def __init__(self, *args, test_mode=True, **kwargs):
super().__init__(origin, *args, **kwargs)


class OrcidXMLTransformer(XMLTransformer):
"""ORCiD XML Transfomer."""
class OrcidTransformer(BaseTransformer):
"""Transforms an ORCiD record into a names record."""

def apply(self, stream_entry, **kwargs):
"""Applies the transformation to the stream entry."""
xml_tree = self._xml_to_etree(stream_entry.entry)
researcher = self._etree_to_dict(xml_tree)
record = researcher["html"]["body"].get("record")

if not record:
raise TransformerError(f"Record not found in ORCiD entry.")

record = stream_entry.entry
person = record["person"]
orcid_id = record["orcid-identifier"]["uri"]

Expand Down Expand Up @@ -132,7 +125,7 @@ def write(self, stream_entry, *args, **kwargs):


VOCABULARIES_DATASTREAM_TRANSFORMERS = {
"orcid-xml": OrcidXMLTransformer,
"orcid": OrcidTransformer,
}
"""ORCiD Data Streams transformers."""

Expand All @@ -151,7 +144,8 @@ def write(self, stream_entry, *args, **kwargs):
}
},
"transformers": [
{"type": "orcid-xml"}
{"type": "xml"},
{"type": "orcid"}
],
"writers": [{
"type": "names-service",
Expand Down
45 changes: 40 additions & 5 deletions tests/contrib/names/test_names_datastreams.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2021 CERN.
# Copyright (C) 2021-2022 CERN.
#
# Invenio-Vocabularies is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand All @@ -16,7 +16,7 @@

from invenio_vocabularies.contrib.names.api import Name
from invenio_vocabularies.contrib.names.datastreams import \
NamesServiceWriter, OrcidHTTPReader, OrcidXMLTransformer
NamesServiceWriter, OrcidHTTPReader, OrcidTransformer
from invenio_vocabularies.contrib.names.services import NamesService, \
NamesServiceConfig
from invenio_vocabularies.datastreams import StreamEntry
Expand Down Expand Up @@ -122,9 +122,44 @@ def bytes_xml_entry():
return StreamEntry(XML_ENTRY_DATA)


def test_orcid_xml_transformer(bytes_xml_entry, expected_from_xml):
transformer = OrcidXMLTransformer()
assert expected_from_xml == transformer.apply(bytes_xml_entry).entry
@pytest.fixture(scope="module")
def dict_xml_entry():
return StreamEntry({
'orcid-identifier': {
'uri': 'https://orcid.org/0000-0001-8135-3489',
'path': '0000-0001-8135-3489',
'host': 'orcid.org'
},
'person': {
'name': {
'given-names': 'Lars Holm',
'family-name': 'Nielsen',
'@visibility': 'public',
'@path': '0000-0001-8135-3489'
},
'external-identifiers': {
'@path': '/0000-0001-8135-3489/external-identifiers'
},
'@path': '/0000-0001-8135-3489/person'
},
'activities-summary': {
'employments': {
'affiliation-group': {
'employment-summary': {
'organization': {'name': 'CERN'}
}
},
'@path': '/0000-0001-8135-3489/employments'
},
'@path': '/0000-0001-8135-3489/activities'
},
'@path': '/0000-0001-8135-3489'
})


def test_orcid_transformer(dict_xml_entry, expected_from_xml):
transformer = OrcidTransformer()
assert expected_from_xml == transformer.apply(dict_xml_entry).entry


class MockResponse():
Expand Down
14 changes: 10 additions & 4 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2021 CERN.
# Copyright (C) 2021-2022 CERN.
#
# Invenio-Vocabularies is free software; you can redistribute it and/or
# modify it under the terms of the MIT License; see LICENSE file for more
Expand All @@ -19,9 +19,13 @@

from invenio_vocabularies.cli import _process_vocab, get_config_for_ds, \
vocabularies
from invenio_vocabularies.config import VOCABULARIES_DATASTREAM_TRANSFORMERS, \
VOCABULARIES_DATASTREAM_WRITERS
from invenio_vocabularies.contrib.names.api import Name
from invenio_vocabularies.contrib.names.datastreams import \
NamesServiceWriter, OrcidXMLTransformer
VOCABULARIES_DATASTREAM_TRANSFORMERS as NAMES_TRANSFORMERS
from invenio_vocabularies.contrib.names.datastreams import \
VOCABULARIES_DATASTREAM_WRITERS as NAMES_WRITERS
from invenio_vocabularies.contrib.names.services import NamesService, \
NamesServiceConfig

Expand Down Expand Up @@ -61,10 +65,12 @@ def base_app(base_app, names_service):
def app_config(app_config):
"""Mimic an instance's configuration."""
app_config["VOCABULARIES_DATASTREAM_TRANSFORMERS"] = {
"orcid-xml": OrcidXMLTransformer
**VOCABULARIES_DATASTREAM_TRANSFORMERS,
**NAMES_TRANSFORMERS,
}
app_config["VOCABULARIES_DATASTREAM_WRITERS"] = {
"names-service": NamesServiceWriter
**VOCABULARIES_DATASTREAM_WRITERS,
**NAMES_WRITERS,
}

return app_config
Expand Down

0 comments on commit 88b85e1

Please sign in to comment.