awards: refactor and formatting

inveniosoftware · Sep 30, 2024 · 1512b3f · 1512b3f
1 parent 5e835b9
commit 1512b3f
Show file tree

Hide file tree

Showing 13 changed files with 87 additions and 45 deletions.
diff --git a/invenio_vocabularies/contrib/affiliations/datastreams.py b/invenio_vocabularies/contrib/affiliations/datastreams.py
@@ -53,7 +53,6 @@ class OpenAIREOrganizationTransformer(BaseTransformer):
     def apply(self, stream_entry, **kwargs):
         """Applies the transformation to the stream entry."""
         record = stream_entry.entry
-
         organization = {"openaire_id": record["id"]}
 
         for pid in record["pid"]:

diff --git a/invenio_vocabularies/contrib/awards/datastreams.py b/invenio_vocabularies/contrib/awards/datastreams.py
@@ -11,6 +11,7 @@
 import io
 
 import requests
+from flask import current_app
 from invenio_access.permissions import system_identity
 from invenio_i18n import lazy_gettext as _
 
@@ -160,16 +161,19 @@ def apply(self, stream_entry, **kwargs):
         award = {}
 
         # Here `id` is the project ID, which will be used to attach the update to the existing project.
-        award["id"] = f"00k4n6c32::{record['id']}"
+        award["id"] = (
+            f"{current_app.config['VOCABULARIES_AWARDS_EC_ROR_ID']}::{record['id']}"
+        )
 
-        categories = record["relations"]["categories"]["category"]
+        categories = record.get("relations", {}).get("categories", {}).get("category")
         if isinstance(categories, dict):
             categories = [categories]
 
         award["subjects"] = [
-            {"id": f"euroscivoc:{category['code'].split('/')[-1]}"}
+            {"id": f"euroscivoc:{vocab_id}"}
             for category in categories
             if category.get("@classification") == "euroSciVoc"
+            and (vocab_id := category["code"].split("/")[-1]).isdigit()
         ]
 
         organizations = record["relations"]["associations"]["organization"]
@@ -187,7 +191,6 @@ def apply(self, stream_entry, **kwargs):
                 continue
 
             organization_data = {
-                # TODO: Here the legal name is uppercase.
                 "organization": organization["legalname"],
             }
 
@@ -216,7 +219,9 @@ def __init__(self, *args, **kwargs):
         service_or_name = kwargs.pop("service_or_name", "awards")
         # Here we only update and we do not insert, since CORDIS data is used to augment existing awards
         # (with subjects and organizations information) and is not used to create new awards.
-        super().__init__(service_or_name=service_or_name, insert=False, *args, **kwargs)
+        super().__init__(
+            service_or_name=service_or_name, insert=False, update=True, *args, **kwargs
+        )
 
     def _entry_id(self, entry):
         """Get the id from an entry."""

diff --git a/invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json b/invenio_vocabularies/contrib/awards/jsonschemas/awards/award-v1.0.0.json
@@ -74,8 +74,7 @@
           },
           "uniqueItems": true
         }
-      },
-      "uniqueItems": true
+      }
     },
     "organizations": {
       "description": "Award's organizations.",

diff --git a/invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json b/invenio_vocabularies/contrib/awards/mappings/os-v1/awards/award-v1.0.0.json
@@ -76,6 +76,12 @@
             "type": "object",
             "dynamic": "true"
           },
+          "subject": {
+            "type": "keyword"
+          },
+          "scheme": {
+            "type": "keyword"
+          },
           "identifiers": {
             "properties": {
               "identifier": {

diff --git a/invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json b/invenio_vocabularies/contrib/awards/mappings/os-v2/awards/award-v1.0.0.json
@@ -76,6 +76,12 @@
             "type": "object",
             "dynamic": "true"
           },
+          "subject": {
+            "type": "keyword"
+          },
+          "scheme": {
+            "type": "keyword"
+          },
           "identifiers": {
             "properties": {
               "identifier": {

diff --git a/invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json b/invenio_vocabularies/contrib/awards/mappings/v7/awards/award-v1.0.0.json
@@ -76,6 +76,12 @@
             "type": "object",
             "dynamic": "true"
           },
+          "subject": {
+            "type": "keyword"
+          },
+          "scheme": {
+            "type": "keyword"
+          },
           "identifiers": {
             "properties": {
               "identifier": {

diff --git a/invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py b/invenio_vocabularies/contrib/subjects/euroscivoc/datastreams.py
@@ -85,7 +85,7 @@ def _get_notation(self, subject, rdf_graph):
     def _get_labels(self, subject, rdf_graph):
         """Extract prefLabel and altLabel languages for a subject."""
         labels = {
-            label.language: label.value
+            label.language: label.value.capitalize()
             for _, _, label in rdf_graph.triples(
                 (subject, self.SKOS_CORE.prefLabel, None)
             )
@@ -94,7 +94,7 @@ def _get_labels(self, subject, rdf_graph):
             for _, _, label in rdf_graph.triples(
                 (subject, self.SKOS_CORE.altLabel, None)
             ):
-                labels.setdefault(label.language, label.value)
+                labels.setdefault(label.language, label.value.capitalize())
         return labels
 
     def _find_parents(self, subject, rdf_graph):

diff --git a/invenio_vocabularies/datastreams/writers.py b/invenio_vocabularies/datastreams/writers.py
@@ -89,8 +89,6 @@ def _do_update(self, entry):
         current = self._resolve(vocab_id)
 
         updated = dict(current.to_dict(), **entry)
-        # TODO: Try to use _record instead of to_dict()
-        # updated = dict(current._record, **entry)
 
         return StreamEntry(self._service.update(self._identity, vocab_id, updated))
 

diff --git a/invenio_vocabularies/factories.py b/invenio_vocabularies/factories.py
@@ -27,8 +27,6 @@
 from .contrib.names.datastreams import DATASTREAM_CONFIG as names_ds_config
 from .contrib.subjects.datastreams import DATASTREAM_CONFIG as subjects_ds_config
 
-# from .contrib.projects.datastreams import DATASTREAM_CONFIG as projects_ds_config
-
 
 class VocabularyConfig:
     """Vocabulary Config Factory."""

diff --git a/tests/contrib/affiliations/conftest.py b/tests/contrib/affiliations/conftest.py
@@ -33,6 +33,23 @@ def affiliation_full_data():
     }
 
 
+@pytest.fixture(scope="function")
+def affiliation_openaire_data():
+    """Full affiliation data."""
+    return {
+        "acronym": "CERN",
+        "id": "01ggx4157",
+        "identifiers": [{"identifier": "999988133", "scheme": "pic"}],
+        "name": "Test affiliation",
+        "title": {"en": "Test affiliation", "es": "Afiliacion de test"},
+        "country": "CH",
+        "country_name": "Switzerland",
+        "location_name": "Geneva",
+        "status": "active",
+        "types": ["facility", "funder"],
+    }
+
+
 @pytest.fixture(scope="function")
 def openaire_affiliation_full_data():
     """Full OpenAIRE affiliation data."""

diff --git a/tests/contrib/affiliations/test_affiliations_datastreams.py b/tests/contrib/affiliations/test_affiliations_datastreams.py
@@ -51,11 +51,13 @@ def expected_from_ror_json():
 
 
 def test_ror_transformer(app, dict_ror_entry, expected_from_ror_json):
+    """Test RORTransformer to ensure it transforms ROR entries correctly."""
     transformer = RORTransformer(vocab_schemes=affiliation_schemes)
     assert expected_from_ror_json == transformer.apply(dict_ror_entry).entry
 
 
 def test_affiliations_service_writer_create(app, search_clear, affiliation_full_data):
+    """Test AffiliationsServiceWriter for creating a new affiliation."""
     writer = AffiliationsServiceWriter()
     affiliation_rec = writer.write(StreamEntry(affiliation_full_data))
     affiliation_dict = affiliation_rec.entry.to_dict()
@@ -68,6 +70,7 @@ def test_affiliations_service_writer_create(app, search_clear, affiliation_full_
 def test_affiliations_service_writer_duplicate(
     app, search_clear, affiliation_full_data
 ):
+    """Test AffiliationsServiceWriter for handling duplicate entries."""
     writer = AffiliationsServiceWriter()
     affiliation_rec = writer.write(stream_entry=StreamEntry(affiliation_full_data))
     Affiliation.index.refresh()  # refresh index to make changes live
@@ -84,6 +87,7 @@ def test_affiliations_service_writer_duplicate(
 def test_affiliations_service_writer_update_existing(
     app, search_clear, affiliation_full_data, service
 ):
+    """Test updating an existing affiliation using AffiliationsServiceWriter."""
     # create vocabulary
     writer = AffiliationsServiceWriter(update=True)
     orig_affiliation_rec = writer.write(stream_entry=StreamEntry(affiliation_full_data))
@@ -107,6 +111,7 @@ def test_affiliations_service_writer_update_existing(
 def test_affiliations_service_writer_update_non_existing(
     app, search_clear, affiliation_full_data, service
 ):
+    """Test creating a new affiliation when updating a non-existing entry."""
     # vocabulary item not created, call update directly
     updated_affiliation = deepcopy(affiliation_full_data)
     updated_affiliation["name"] = "New name"
@@ -165,6 +170,7 @@ def expected_from_openaire_json():
 def test_openaire_organization_transformer(
     app, dict_openaire_organization_entry, expected_from_openaire_json
 ):
+    """Test OpenAIREOrganizationTransformer for transforming OpenAIRE entries."""
     transformer = OpenAIREOrganizationTransformer()
     assert (
         expected_from_openaire_json
@@ -173,11 +179,16 @@ def test_openaire_organization_transformer(
 
 
 def test_openaire_affiliations_service_writer(
-    app, search_clear, affiliation_full_data, openaire_affiliation_full_data, service
+    app,
+    search_clear,
+    affiliation_openaire_data,
+    openaire_affiliation_full_data,
+    service,
 ):
+    """Test writing and updating an OpenAIRE affiliation entry."""
     # create vocabulary with original service writer
     orig_writer = AffiliationsServiceWriter()
-    orig_affiliation_rec = orig_writer.write(StreamEntry(affiliation_full_data))
+    orig_affiliation_rec = orig_writer.write(StreamEntry(affiliation_openaire_data))
     orig_affiliation_dict = orig_affiliation_rec.entry.to_dict()
     Affiliation.index.refresh()  # refresh index to make changes live
 
@@ -193,10 +204,6 @@ def test_openaire_affiliations_service_writer(
     # updating fields changing from one update to the other
     orig_affiliation_dict["revision_id"] = affiliation_dict["revision_id"]
     orig_affiliation_dict["updated"] = affiliation_dict["updated"]
-    # Adding the extra identifier coming from OpenAIRE
-    orig_affiliation_dict["identifiers"].extend(
-        openaire_affiliation_full_data["identifiers"]
-    )
 
     assert dict(orig_affiliation_dict) == affiliation_dict
 
@@ -207,6 +214,7 @@ def test_openaire_affiliations_service_writer(
 def test_openaire_affiliations_service_writer_non_openorgs(
     app, openaire_affiliation_full_data
 ):
+    """Test error handling for non-OpenOrgs ID in OpenAIRE writer."""
     writer = OpenAIREAffiliationsServiceWriter()
 
     updated_openaire_affiliation = deepcopy(openaire_affiliation_full_data)
@@ -226,6 +234,7 @@ def test_openaire_affiliations_service_writer_non_openorgs(
 def test_openaire_affiliations_service_writer_no_id(
     app, openaire_affiliation_full_data
 ):
+    """Test error handling when missing ID in OpenAIRE writer."""
     writer = OpenAIREAffiliationsServiceWriter()
 
     updated_openaire_affiliation = deepcopy(openaire_affiliation_full_data)
@@ -241,6 +250,7 @@ def test_openaire_affiliations_service_writer_no_id(
 def test_openaire_affiliations_service_writer_no_alternative_identifiers(
     app, openaire_affiliation_full_data
 ):
+    """Test error handling when missing alternative identifiers in OpenAIRE writer."""
     writer = OpenAIREAffiliationsServiceWriter()
 
     updated_openaire_affiliation = deepcopy(openaire_affiliation_full_data)