Skip to content

Commit

Permalink
Affiliation loaded first from DB and after from ORCID (#236)
Browse files Browse the repository at this point in the history
* Update affiliation.py

* take from db first / #235

* duplicate - logging
  • Loading branch information
eliselavy authored Dec 30, 2024
1 parent bd4206e commit 5f54ce8
Show file tree
Hide file tree
Showing 3 changed files with 176 additions and 93 deletions.
4 changes: 2 additions & 2 deletions jdh/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,10 @@
'level': get_env_variable('DJANGO_LOG_LEVEL', 'DEBUG'),
'propagate': True,
},
'jdhseo.views': {
'jdhseo': {
'handlers': ['console'],
'level': get_env_variable('DJANGO_LOG_LEVEL', 'DEBUG'),
'propagate': False,
'propagate': True,
},
},
'formatters': {
Expand Down
220 changes: 164 additions & 56 deletions jdhapi/utils/affiliation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,25 @@
import pycountry
from jdhseo.utils import get_affiliation
from jdhapi.models import Author

logger = logging.getLogger(__name__)


def get_authors(article_authors, affiliations):
"""
Extracts and returns a list of authors with their respective affiliations.
Args:
article_authors (list): A list of author objects, where each object contains
attributes like 'firstname', 'lastname', and 'orcid'.
affiliations (list): A list of affiliation dictionaries, where each dictionary
contains 'aff_id', 'aff_pub_id', and 'authors_link' which
is a list of author last names linked to the affiliation.
Returns:
list: A list of dictionaries, where each dictionary represents an author with
their 'given_names', 'surname', 'orcid', 'aff_id', and 'aff_pub_id'.
"""
authors = []
for author in article_authors:
for affiliation in affiliations:
Expand All @@ -18,91 +33,162 @@ def get_authors(article_authors, affiliations):
"surname": author.lastname,
"orcid": author.orcid,
"aff_id": affiliation["aff_id"],
"aff_pub_id": affiliation["aff_pub_id"]
"aff_pub_id": affiliation["aff_pub_id"],
}
authors.append(contrib)
# logger.debug(f'authors {authors}')
return authors

def get_affiliation_from_orcid(orcid_url, affiliation):
"""
Retrieve the affiliation details from ORCID.
Args:
orcid_url (str): The ORCID URL of the author.
affiliation (str): The institution affiliation of the author.
Returns:
dict or None: A dictionary containing the affiliation details if found, otherwise None.
"""
ORCID_URL = "https://orcid.org/"
logger.debug(f"ORCID URL: {orcid_url}")
orcid = orcid_url.partition(ORCID_URL)[2]
city_country = get_affiliation(orcid)
if city_country:
city = city_country.partition("-")[0].strip()
country = city_country.partition("-")[2].strip()
country_name = pycountry.countries.get(alpha_2=country).name
affiliation = {
"institution": affiliation,
"city": city,
"country": country,
"country_name": country_name,
}
logger.debug(f"Affiliation found in ORCID for author {orcid}")
return affiliation
logger.debug(f"No affiliation found in ORCID for author {orcid}")
return None

def check_database_for_affiliation(author_id):
"""
Check the database for an affiliation for the given author ID.
Args:
author_id (int): The ID of the author.
Returns:
dict or None: A dictionary containing the affiliation details if found, otherwise None.
"""
try:
author = Author.objects.get(id=author_id)
if author.city and author.country:
logger.debug(
f"Affiliation found in DB for author {author.lastname}"
)
affiliation = {
"institution": author.affiliation,
"city": author.city,
"country": author.country,
"country_name": author.country.name,
}
return affiliation
else:
logger.debug(
f"City and country not found in DB for author {author.lastname}"
)
return None
except Author.DoesNotExist:
logger.debug(f"Author with ID {author_id} does not exist in the database.")
return None



def get_affiliation_json_one(author_id, orcid_url, affiliation):
"""
Retrieve the affiliation details for an author based on ORCID URL or database information.
Args:
author_id (int): The ID of the author in the database.
orcid_url (str): The ORCID URL of the author.
affiliation (str): The institution affiliation of the author.
Returns:
dict: A dictionary containing the affiliation details.
"""
logger.debug('START get_affiliation_json_one')

# Check in the database if there is an affiliation
db_affiliation = check_database_for_affiliation(author_id)
if db_affiliation:
return db_affiliation

# Call ORCID URL if no affiliation in the database
orcid_affiliation = get_affiliation_from_orcid(orcid_url, affiliation)
if orcid_affiliation:
return orcid_affiliation

# Return default affiliation if no answer from ORCID URL
default_affiliation = {
"institution": affiliation,
"city": "NOT FOUND",
"country": "NOT FOUND",
"country_name": "NOT FOUND"
}
if orcid_url:
# contact_orcid
ORCID_URL = "https://orcid.org/"
orcid = orcid_url.partition(ORCID_URL)[2]
city_country = get_affiliation(orcid)
if city_country:
# if city_country.find('-') != -1:
city = city_country.partition('-')[0].strip()
country = city_country.partition('-')[2].strip()
country_name = pycountry.countries.get(alpha_2=country).name
affiliation = {
"institution": affiliation,
"city": city,
"country": country,
"country_name": country_name
}
else:
# go to retrieve from the author
author = Author.objects.get(
id=author_id)
if author.city and author.country:
logger.debug(f'ORCID but no city and country found - find in DB {author.lastname}')
affiliation = {
"institution": affiliation,
"city": author.city,
"country": author.country,
"country_name": author.country.name
}
else:
logger.debug(f'ORCID but no city and country found - NOT found in DB - default_affiliation {author.lastname}')
affiliation= default_affiliation
else:
# go to retrieve from the author
author = Author.objects.get(
id=author_id)
if author.city and author.country:
logger.debug(f'NO ORCID but no city and country found - find in DB {author.lastname}')
affiliation = {
"institution": affiliation,
"city": author.city,
"country": author.country,
"country_name": author.country.name
}
else:
logger.debug(f'NO ORCID but no city and country found - NOT found in DB - default_affiliation {author.lastname}')
return default_affiliation
logger.debug('END get_affiliation_json_one')
return affiliation
return default_affiliation


def get_aff_pub_id(publisher_id, aff_id):
"""
Generate a formatted affiliation publisher ID.
Args:
publisher_id (str): The ID of the publisher.
aff_id (int): The affiliation ID.
Returns:
str: A string in the format "j_<publisher_id>_aff_00<aff_id>".
"""
return "j_" + publisher_id.lower() + "_aff_00" + str(aff_id)


def get_affiliation_json(authors, publisher_id):
"""
Generate a list of affiliation JSON objects for a given list of authors and a publisher ID.
Args:
authors (list): A list of author objects. Each author object should have the attributes 'id', 'orcid', 'affiliation', and 'lastname'.
publisher_id (str): The ID of the publisher.
Returns:
list: A list of dictionaries, each representing an affiliation. Each dictionary contains the following keys:
- aff_id (int): The affiliation ID.
- authors_link (list): A list of author last names associated with the affiliation.
- aff_pub_id (str): The formatted affiliation publisher ID.
- institution (str): The institution name (from the author's affiliation).
"""
affiliations = []
i = 1
for author in authors:
affiliation_one = get_affiliation_json_one(author.id, author.orcid, author.affiliation)
affiliation_one = get_affiliation_json_one(
author.id, author.orcid, author.affiliation
)
if len(affiliations) == 0:
affiliation_one["aff_id"] = i
affiliation_one["authors_link"] = [author.lastname]
# format j_publisherId_aff_00sup Ex: j_jdh-2021-1006_aff_001
affiliation_one["aff_pub_id"] = get_aff_pub_id(publisher_id, affiliation_one["aff_id"])
affiliation_one["aff_pub_id"] = get_aff_pub_id(
publisher_id, affiliation_one["aff_id"]
)
affiliations.append(affiliation_one)
else:
# need to check if already exist
result = next(
(item for item in affiliations if item['institution'] == affiliation_one['institution']),
{}
(
item
for item in affiliations
if item["institution"] == affiliation_one["institution"]
),
{},
)
if result:
# need to add author
Expand All @@ -111,16 +197,38 @@ def get_affiliation_json(authors, publisher_id):
i += 1
affiliation_one["aff_id"] = i
affiliation_one["authors_link"] = [author.lastname]
affiliation_one["aff_pub_id"] = get_aff_pub_id(publisher_id, affiliation_one["aff_id"])
affiliation_one["aff_pub_id"] = get_aff_pub_id(
publisher_id, affiliation_one["aff_id"]
)
affiliations.append(affiliation_one)
# logger.info(f'affiliations: {affiliations}')
return affiliations


# method to check if affiliation is default_affiliation or not
def is_default_affiliation(affiliations):
"""
Checks if any affiliation in the list has default values.
This function iterates through a list of affiliations and checks if any
affiliation has "NOT FOUND" as the value for the keys "city", "country",
or "country_name". If any of these keys have the value "NOT FOUND", the
function returns True, indicating that the affiliation has default values.
Args:
affiliations (list of dict): A list of affiliation dictionaries, where
each dictionary contains keys "city",
"country", and "country_name".
Returns:
bool: True if any affiliation has "NOT FOUND" as the value for "city",
"country", or "country_name". False otherwise.
"""
for affiliation in affiliations:
# if in the affiliation the city is NOT FOUND or country is NOT FOUND or country_name is NOT FOUND return True
if affiliation["city"] == "NOT FOUND" or affiliation["country"] == "NOT FOUND" or affiliation["country_name"] == "NOT FOUND":
if (
affiliation["city"] == "NOT FOUND"
or affiliation["country"] == "NOT FOUND"
or affiliation["country_name"] == "NOT FOUND"
):
return True
return False
45 changes: 10 additions & 35 deletions jdhseo/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,15 @@ def ArticleDetail(request, pid):
remote_url = urllib.parse.urljoin(
settings.JDHSEO_PROXY_HOST, notebook_url)
try:
res = requests.get(remote_url)
res = requests.get(remote_url, timeout=10)
# add NB paragraphs to context
context.update({'nb': parseJupyterNotebook(res.json(), contact_orcid)})
try:
notebook_data = res.json()
context.update({'nb': parseJupyterNotebook(notebook_data, contact_orcid)})
except ValueError as e:
logger.error(f'Error parsing JSON for article pk={article.pk} notebook remote_url={remote_url}')
logger.exception(e)
raise Http404(f'Error parsing JSON for article pk={article.pk} notebook remote_url={remote_url}')
except Exception as e:
logger.error(
f'Error occurred on article pk={article.pk}'
Expand Down Expand Up @@ -142,40 +148,9 @@ def ArticleXmlDG(request, pid):
}
keywords.append(keyword)
if 'title' in article.data:
articleTitle = html.fromstring(marko.convert(article.data['title'][0])).text_content()
article_title = html.fromstring(marko.convert(article.data['title'][0])).text_content()
context = {
'articleXml': ArticleXml(article.abstract.authors.all(), articleTitle, article.doi, keywords, article.publication_date, article.copyright_type, article.issue, pid),
'journal_publisher_id': 'jdh',
'journal_code': 'jdh',
'doi_code': 'jdh',
'issn': '2747-5271',
}
except Article.DoesNotExist:
raise Http404("Article does not exist")
return render(request, 'jdhseo/article_dg.xml', context, content_type='text/xml; charset=utf-8')


def ArticleXmlDG(request, pid):
try:
article = Article.objects.get(
abstract__pid=pid,
status=Article.Status.PUBLISHED)

nbauthors = article.abstract.authors.count()
logger.debug(f'Nb Authors(count={nbauthors}) for article {pid}')
logger.debug(f'Belongs to issue {article.issue}')
keywords = []
if 'keywords' in article.data:
array_keys = article.data['keywords'][0].replace(';', ',').split(',')
for item in array_keys:
keyword = {
"keyword": item,
}
keywords.append(keyword)
if 'title' in article.data:
articleTitle = html.fromstring(marko.convert(article.data['title'][0])).text_content()
context = {
'articleXml': ArticleXml(article.abstract.authors.all(), articleTitle, article.doi, keywords, article.publication_date, article.copyright_type, article.issue, pid),
'articleXml': ArticleXml(article.abstract.authors.all(), article_title, article.doi, keywords, article.publication_date, article.copyright_type, article.issue, pid),
'journal_publisher_id': 'jdh',
'journal_code': 'jdh',
'doi_code': 'jdh',
Expand Down

0 comments on commit 5f54ce8

Please sign in to comment.