Refactor author and affiliation handling in pdf generation to use sam…

…e logic as for the xml #235
C2DH · Jan 2, 2025 · 9fe05d9 · 9fe05d9
1 parent 1c2eec3
commit 9fe05d9
Show file tree

Hide file tree

Showing 4 changed files with 50 additions and 18 deletions.
diff --git a/jdhapi/utils/publication_date.py b/jdhapi/utils/publication_date.py
@@ -41,7 +41,6 @@ def get_order_publication(pid, issue_pid):
         if check_if_editorial(pid):
             return "1"
         else:
-            logger.info("not an editorial")
             seq = "UNDEFINED"
             articles = Article.objects.filter(status=Article.Status.PUBLISHED, issue__pid=issue_pid).exclude(tags__name="editorial").order_by('publication_date').values("abstract__pid", 'abstract__title')
             # Start index from 2 because editorials are given the position 1

diff --git a/jdhseo/templates/jdhseo/article_detail.html b/jdhseo/templates/jdhseo/article_detail.html
@@ -237,7 +237,7 @@
     <div class="contributors mb5">
       {% for t in nb.contributor %}
         <div class="contributor mr10">
-        {{ t }}
+          {{ t|safe }}
         </div>
       {% endfor %}
     </div>

diff --git a/jdhseo/utils.py b/jdhseo/utils.py
@@ -82,13 +82,10 @@ def getReferencesFromJupyterNotebook(notebook):
     return references, sorted(bibliography, key=lambda x: re.sub('[^A-Za-z]+', '', x).lower()), inline_references_table
 
 
-def parseJupyterNotebook(notebook, contact_orcid):
-    logger.info(f'CONTACT ORCID: {contact_orcid}')
-    affiliation = get_affiliation(contact_orcid)
+def parseJupyterNotebook(notebook, merged_authors_affiliations):
     cells = notebook.get('cells')
     title = []
     abstract = []
-    contributor = []
     disclaimer = []
     paragraphs = []
     collaborators = []
@@ -100,6 +97,18 @@ def formatInlineCitations(m):
         if parsed_ref is None:
             return f'{m[1]}'
         return parsed_ref
+
+    # Build contributor array based on merged_authors_affiliations
+    contributor = []
+    for author in merged_authors_affiliations:
+        contributor_html = (
+            f'<h3>{author["given_names"]} {author["surname"]} '
+            f'<a href="{author["orcid"]}">'
+            f'<img src="https://orcid.org/sites/default/files/images/orcid_16x16.png" alt="orcid" /></a></h3>\n'
+            f'<p>{author["institution"]}, {author["city"]}, {author["country_name"]}</p>\n'
+        )
+        contributor.append(contributor_html)
+
     num = 0
     for cell in cells:
         # check cell metadata
@@ -108,17 +117,12 @@ def formatInlineCitations(m):
         source = re.sub(
             r'<cite\s+data-cite=.([/\dA-Z]+).>([^<]*)</cite>',
             formatInlineCitations, source)
-        if 'hidden' in tags:
+        if 'hidden' in tags or 'contributor' in tags:
             continue
         if 'title' in tags:
             title.append(marko.convert(source))
         elif 'abstract' in tags:
             abstract.append(marko.convert(source))
-        elif 'contributor' in tags:
-            if affiliation:
-                contributor.append(marko.convert(source + ' - ' + affiliation))
-            else:
-                contributor.append(marko.convert(source))
         elif 'disclaimer' in tags:
             disclaimer.append(marko.convert(source))
         elif 'collaborators' in tags:
@@ -132,7 +136,7 @@ def formatInlineCitations(m):
             elif cell.get('cell_type') == 'code':
                 num = num + 1
                 paragraphs.append({"numCode": num, "code": marko.convert(source)})
-
+    logger.info(f"contributors {contributor}")
     return {
         'title': title,
         'title_plain': strip_tags(''.join(title)).strip(),
@@ -229,4 +233,32 @@ def get_education_affiliation(orcid, api_url, headers):
                     return f"{last['address']['city']} - {last['address']['country']}"
     return None
 
+def merge_authors_affiliations(authors, affiliations):
+    """
+    Merge authors and affiliations information into a single structure.
+
+    Args:
+        authors (list): A list of author dictionaries.
+        affiliations (list): A list of affiliation dictionaries.
+
+    Returns:
+        list: A list of dictionaries, each containing merged author and affiliation information.
+    """
+    merged_list = []
+    for author in authors:
+        for affiliation in affiliations:
+            if author['aff_id'] == affiliation['aff_id']:
+                merged_info = {
+                    'given_names': author['given_names'],
+                    'surname': author['surname'],
+                    'orcid': author['orcid'],
+                    'institution': affiliation['institution'],
+                    'city': affiliation['city'],
+                    'country': affiliation['country'],
+                    'country_name': affiliation['country_name']
+                }
+                merged_list.append(merged_info)
+                break
+    return merged_list
+
 
diff --git a/jdhseo/views.py b/jdhseo/views.py
@@ -10,7 +10,7 @@
 from django.shortcuts import render
 from jdhapi.models import Article, Issue, Author
 from django.conf import settings
-from .utils import parseJupyterNotebook, generate_qrcode
+from .utils import parseJupyterNotebook, generate_qrcode, merge_authors_affiliations
 from .utils import getPlainMetadataFromArticle
 from django.http import HttpResponse
 from jdhapi.utils.article_xml import ArticleXml
@@ -64,9 +64,10 @@ def ArticleDetail(request, pid):
             pid=pid
         )
         authors = article_xml.authors
-        logger.info(f"authors {authors}")
         affiliations = article_xml.affiliations
-        logger.info(f"affiliations {affiliations}")
+        merged_authors_affiliations = merge_authors_affiliations(authors, affiliations)
+        logger.info(f"merged_authors_affiliations {merged_authors_affiliations}")
+
     except Http404 as e:
         raise Http404(f"Error initializing ArticleXml: {str(e)}")
     # fill the context for the template file.
@@ -81,7 +82,7 @@ def ArticleDetail(request, pid):
         'published_date': published_date,
         'keywords': array_keys,
         'authors': authors,
-        'affiliations': affiliations
+        'authors_affiliations': merged_authors_affiliations
     }
     # check if it is a github url
     context.update({'proxy': 'github', 'host': settings.JDHSEO_PROXY_HOST})
@@ -96,7 +97,7 @@ def ArticleDetail(request, pid):
             # add NB paragraphs to context
             try:
                 notebook_data = res.json()
-                context.update({'nb': parseJupyterNotebook(notebook_data, contact_orcid)})
+                context.update({'nb': parseJupyterNotebook(notebook_data, merged_authors_affiliations)})
             except ValueError as e:
                 logger.error(f'Error parsing JSON for article pk={article.pk} notebook remote_url={remote_url}')
                 logger.exception(e)