From b342ba163a6a716b3dc8cb744db5b76ec85b60ae Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Thu, 22 Aug 2024 12:19:17 -0300 Subject: [PATCH 01/15] Cria testes para modelo ArticleFormat --- article/tests.py | 106 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) diff --git a/article/tests.py b/article/tests.py index f97d8943..d5e9803f 100755 --- a/article/tests.py +++ b/article/tests.py @@ -1,11 +1,13 @@ from freezegun import freeze_time from django.test import TestCase +from django.core.files.uploadedfile import SimpleUploadedFile from django_test_migrations.migrator import Migrator from datetime import datetime from django.utils.timezone import make_aware -from article.models import Article +from article.models import Article, ArticleFormat from article.tasks import remove_duplicate_articles +from core.users.models import User class TestArticleMigration(TestCase): @@ -58,3 +60,105 @@ def test_remove_duplicates_for_multiple_pids(self): self.assertEqual(Article.objects.filter(pid_v3="pid3").count(), 1) self.assertEqual(Article.objects.get(pid_v3="pid2").created, make_aware(datetime(2022, 6, 3))) self.assertEqual(Article.objects.get(pid_v3="pid3").created, make_aware(datetime(2022, 6, 14))) + + +class ArticleFormatModelTest(TestCase): + def setUp(self): + self.user = User.objects.create( + name="admin", + ) + self.article = Article.objects.create( + pid_v3="P3swRmPHQfy37r9xRbLCw8G", + sps_pkg_name="0001-3714-rm-30-04-299", + ) + + self.test_file = SimpleUploadedFile("test_file.xml", b"Test", content_type="application/xml") + self.test_file2 = SimpleUploadedFile("test_file.xml", b"Test2", content_type="application/xml") + self.article_format = ArticleFormat.objects.create( + article=self.article, + format_name='pmc', + version=1, + file=self.test_file, + valid=True, + status="S", + ) + + def test_get_method(self): + article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) + self.assertEqual(article_format.article, self.article) + self.assertEqual(article_format.format_name, 'pmc') + self.assertEqual(article_format.version, 1) + + def test_create_classmethod(self): + article_format = ArticleFormat.create( + user=self.user, + article=self.article, + format_name='pubmed', + version=1 + ) + self.assertEqual(article_format.article, self.article) + self.assertEqual(article_format.format_name, 'pubmed') + self.assertEqual(article_format.version, 1) + + def test_get_method_raises_value_error(self): + with self.assertRaises(ValueError) as context: + ArticleFormat.get(self.article, format_name='pubmed') + + self.assertEqual(str(context.exception), "ArticleFormat.get requires article and format_name and version") + + def test_create_or_update_classmethod(self): + article_format = ArticleFormat.create_or_update( + user=self.user, + article=self.article, + format_name="pmc", + version=1, + ) + self.assertEqual(article_format.article, self.article) + self.assertEqual(article_format.format_name, 'pmc') + self.assertEqual(article_format.version, 1) + + def test_save_file_method(self): + filename = "0034-7094-rba-69-03-0227.xml" + content = self.test_file + content.seek(0) + content_bytes = content.read() + article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) + article_format.save_file(filename=filename, content=content_bytes) + + with article_format.file.open('rb') as f: + saved_content = f.read() + + self.assertEqual(saved_content, content_bytes) + + def test_update_xml_in_save_file_method(self): + filename = "0034-7094-rba-69-03-0227.xml" + content = self.test_file + content_update = self.test_file2 + content.seek(0) + content_bytes = content.read() + article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) + article_format.save_file(filename=filename, content=content_bytes) + + content_update.seek(0) + content_update_bytes = content_update.read() + article_format.save_file(filename=filename, content=content_update_bytes) + + with article_format.file.open('rb') as f: + saved_content = f.read() + + self.assertEqual(saved_content, content_update_bytes) + + def test_save_format_xml_method(self): + article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) + input_xml = "
Original
" + + filename = article_format.article.sps_pkg_name + ".xml" + article_format.save_format_xml(format_xml=input_xml, filename=filename) + with article_format.file.open('rb') as f: + saved_content = f.read() + self.assertEqual(saved_content, input_xml.encode('utf-8')) + + self.assertEqual(article_format.status, "S") + self.assertEqual(article_format.report, None) + self.assertEqual(article_format.version, 1) + From d32241af32aadc834560c0dde46239222eb873ec Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 23 Aug 2024 10:16:51 -0300 Subject: [PATCH 02/15] Cria TasksConvertXmlFormatsTest --- article/tests.py | 85 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 84 insertions(+), 1 deletion(-) diff --git a/article/tests.py b/article/tests.py index d5e9803f..243326ed 100755 --- a/article/tests.py +++ b/article/tests.py @@ -4,10 +4,13 @@ from django_test_migrations.migrator import Migrator from datetime import datetime from django.utils.timezone import make_aware +from unittest.mock import patch, PropertyMock from article.models import Article, ArticleFormat -from article.tasks import remove_duplicate_articles +from article.tasks import remove_duplicate_articles, convert_xml_to_other_formats from core.users.models import User +from doi.models import DOI +from doi_manager.models import CrossRefConfiguration class TestArticleMigration(TestCase): @@ -162,3 +165,83 @@ def test_save_format_xml_method(self): self.assertEqual(article_format.report, None) self.assertEqual(article_format.version, 1) + +class TasksConvertXmlFormatsTest(TestCase): + def setUp(self): + self.doi = DOI.objects.create( + value="10.1000.10/123456" + ) + self.article = Article.objects.create( + pid_v3="P3swRmPHQfy37r9xRbLCw8G", + sps_pkg_name="0001-3714-rm-30-04-299", + ) + self.user = User.objects.create( + username="admin", + ) + + self.input_xml = "
Original PMC
" + self.modified_xml = "
Modified PMC
" + + def verify_article_format(self, status, version, pid_v3=None, report=None, file_exists=True): + self.assertEqual(ArticleFormat.objects.count(), 1) + article_format = ArticleFormat.objects.first() + self.assertEqual(article_format.article.pid_v3, pid_v3 or self.article.pid_v3) + self.assertEqual(article_format.status, status) + self.assertEqual(article_format.version, version) + if report: + self.assertEqual(article_format.report, report) + if file_exists: + with article_format.file.open('rb') as f: + content = f.read() + self.assertEqual(content, self.modified_xml.encode('utf-8')) + else: + self.assertFalse(article_format.file) + + @patch('article.models.Article.xmltree', new_callable=PropertyMock) + @patch('article.tasks.pmc.pipeline_pmc') + def test_convert_xml_to_pmc_formats(self, mock_pipeline_pmc, mock_property_xmltree): + mock_property_xmltree.return_value = self.input_xml + mock_pipeline_pmc.return_value = self.modified_xml + + convert_xml_to_other_formats(pid_v3=self.article.pid_v3, format_name="pmc", username="admin") + mock_pipeline_pmc.assert_called_once_with(mock_property_xmltree.return_value) + self.verify_article_format(status="S", version=1) + + + @patch('article.models.Article.xmltree', new_callable=PropertyMock) + @patch('article.tasks.pubmed.pipeline_pubmed') + def test_convert_xml_to_pubmed_formats(self, mock_pipeline_pubmed, mock_property_xmltree): + mock_property_xmltree.return_value = self.input_xml + mock_pipeline_pubmed.return_value = self.modified_xml + + convert_xml_to_other_formats(pid_v3=self.article.pid_v3, format_name="pubmed", username="admin") + mock_pipeline_pubmed.assert_called_once_with(mock_property_xmltree.return_value) + self.verify_article_format(status="S", version=1) + + + @patch('doi_manager.models.CrossRefConfiguration.get_data', return_value=dict()) + @patch('article.models.Article.xmltree', new_callable=PropertyMock) + @patch('article.tasks.crossref.pipeline_crossref') + def test_convert_xml_to_crossref_formats(self, mock_pipeline_crossref, mock_property_xmltree, mock_get_data): + self.article.doi.add(self.doi) + mock_property_xmltree.return_value = self.input_xml + mock_pipeline_crossref.return_value = self.modified_xml + + convert_xml_to_other_formats(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") + self.verify_article_format(status="S", version=1) + + + def test_convert_xml_to_crossref_formats_without_doi(self): + convert_xml_to_other_formats(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") + expected_msg = {"exception_msg": f"Unable to format because the article {self.article.pid_v3} has no DOI associated with it"} + self.verify_article_format(status="E", version=1, file_exists=False, report=expected_msg) + + + @patch('doi_manager.models.CrossRefConfiguration.get_data') + def test_convert_xml_to_crossref_formats_missing_crossref_configuration(self, mock_get_data): + self.article.doi.add(self.doi) + mock_get_data.side_effect = CrossRefConfiguration.DoesNotExist + convert_xml_to_other_formats(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") + expected_prefix = '10.1000.10' + mock_get_data.assert_called_once_with(expected_prefix) + From 68679d421bd602e70d02a27c70ea950f86997a6a Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 23 Aug 2024 15:03:27 -0300 Subject: [PATCH 03/15] Realiza melhorias em ArticleFormatModelTest --- article/tests.py | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/article/tests.py b/article/tests.py index 243326ed..9e81f223 100755 --- a/article/tests.py +++ b/article/tests.py @@ -86,11 +86,19 @@ def setUp(self): status="S", ) + def verify_fields_model_article_format(self, article_format, version, format_name=None, status=None, file=None): + self.assertEqual(article_format.article, self.article) + if format_name: + self.assertEqual(article_format.format_name, format_name) + self.assertEqual(article_format.version, version) + self.assertEqual(article_format.report, None) + if status: + self.assertEqual(article_format.status, status) + + def test_get_method(self): article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) - self.assertEqual(article_format.article, self.article) - self.assertEqual(article_format.format_name, 'pmc') - self.assertEqual(article_format.version, 1) + self.verify_fields_model_article_format(article_format=article_format, format_name='pmc', version=1) def test_create_classmethod(self): article_format = ArticleFormat.create( @@ -99,9 +107,7 @@ def test_create_classmethod(self): format_name='pubmed', version=1 ) - self.assertEqual(article_format.article, self.article) - self.assertEqual(article_format.format_name, 'pubmed') - self.assertEqual(article_format.version, 1) + self.verify_fields_model_article_format(article_format=article_format, format_name='pubmed', version=1) def test_get_method_raises_value_error(self): with self.assertRaises(ValueError) as context: @@ -116,9 +122,7 @@ def test_create_or_update_classmethod(self): format_name="pmc", version=1, ) - self.assertEqual(article_format.article, self.article) - self.assertEqual(article_format.format_name, 'pmc') - self.assertEqual(article_format.version, 1) + self.verify_fields_model_article_format(article_format=article_format, format_name='pmc', version=1) def test_save_file_method(self): filename = "0034-7094-rba-69-03-0227.xml" @@ -156,15 +160,12 @@ def test_save_format_xml_method(self): input_xml = "
Original
" filename = article_format.article.sps_pkg_name + ".xml" - article_format.save_format_xml(format_xml=input_xml, filename=filename) + article_format.save_format_xml(format_xml=input_xml, filename=filename, status="S") with article_format.file.open('rb') as f: saved_content = f.read() self.assertEqual(saved_content, input_xml.encode('utf-8')) - self.assertEqual(article_format.status, "S") - self.assertEqual(article_format.report, None) - self.assertEqual(article_format.version, 1) - + self.verify_fields_model_article_format(article_format=article_format, status="S", version=1) class TasksConvertXmlFormatsTest(TestCase): def setUp(self): From 1925e54c4f9fedcd7a8233d95a05ecf61cdffd20 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 23 Aug 2024 15:06:36 -0300 Subject: [PATCH 04/15] Cria tasks para converter os artigos nos formatos especificados. --- article/tasks.py | 134 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 133 insertions(+), 1 deletion(-) diff --git a/article/tasks.py b/article/tasks.py index 1fc6eaa0..bfcf326e 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -5,11 +5,14 @@ from django.db.models import Q, Count from django.contrib.auth import get_user_model from django.utils.translation import gettext as _ +from packtools.sps.formats import pubmed, pmc, crossref from article.models import Article, ArticleFormat from article.sources import xmlsps from article.sources.preprint import harvest_preprints from config import celery_app +from doi_manager.models import CrossRefConfiguration +from journal.models import Journal from pid_provider.models import PidProviderXML from pid_provider.provider import PidProvider from tracker.models import UnexpectedEvent @@ -141,7 +144,7 @@ def task_convert_xml_to_other_formats_for_articles( kwargs={ "user_id": user.id, "username": user.username, - "item_id": item.id, + "pid_v3": item.pid_v3, "force_update": force_update, } ) @@ -192,6 +195,135 @@ def convert_xml_to_other_formats( ArticleFormat.generate_formats(user, article=article) +@celery_app.task(bind=True) +def task_convert_xml_to_other_formats_for_articles(self, format_name, user_id=None, username=None, force_update=False): + journals = Journal.objects.filter(indexed_at__acronym=format_name) + articles = Article.objects.filter(journal__in=journals) + + if not force_update: + articles = articles.filter(article_format__isnull=True) + + try: + task_function_dict = { + "pubmed": convert_xml_to_pubmed_or_pmc_formats, + "pmc": convert_xml_to_pubmed_or_pmc_formats, + "crossref": convert_xml_to_crossref_format, + } + task_function = task_function_dict[format_name] + except Exception as exception: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exception, + exc_traceback=exc_traceback, + detail={ + "task": "article.tasks.task_convert_xml_to_other_formats_for_articles", + "item": str(article), + }, + ) + return + + for article in articles: + try: + task_function.apply_async( + user_id=user_id, + username=username, + format_name=format_name, + ) + except Exception as exception: + exc_type, exc_value, exc_traceback = sys.exc_info() + UnexpectedEvent.create( + exception=exception, + exc_traceback=exc_traceback, + detail={ + "task": "article.tasks.task_convert_xml_to_other_formats_for_articles", + "item": str(article), + }, + ) + + +def get_function_format_xml(format_name): + dict_functions_formats = { + "pmc": pmc.pipeline_pmc, + "pubmed": pubmed.pipeline_pubmed, + "crossref": crossref.pipeline_crossref, + } + return dict_functions_formats.get(format_name) + + +def handler_formatting_error(article_format, message): + article_format.save_format_xml( + filename=None, + format_xml=None, + status="E", + report={"exception_msg": message} + ) + +def get_article_format(user, pid_v3, format_name): + try: + article = Article.objects.get(pid_v3=pid_v3) + except Article.DoesNotExist: + logging.info(f"Unable to convert article {pid_v3} to the specified format") + return + + try: + article_format = ArticleFormat.objects.get(article=article, format_name="pmc") + except ArticleFormat.DoesNotExist: + article_format = ArticleFormat.create_or_update( + user=user, + article=article, + format_name=format_name, + version=1 + ) + return article_format + +@celery_app.task(bind=True) +def convert_xml_to_pubmed_or_pmc_formats(self, pid_v3, format_name, user_id=None, username=None): + user = _get_user(request=self.request, username=username, user_id=user_id) + + article_format = get_article_format(pid_v3=pid_v3, format_name=format_name, user=user) + + function_format = get_function_format_xml(format_name=format_name) + + content = function_format(article_format.article.xmltree) + article_format.save_format_xml( + format_xml=content, + filename=article_format.article.sps_pkg_name + ".xml", + status="S" + ) + + +@celery_app.task(bind=True) +def convert_xml_to_crossref_format(self, pid_v3, format_name, user_id=None, username=None): + user = _get_user(request=self.request, username=username, user_id=user_id) + + article_format = get_article_format(pid_v3=pid_v3, format_name=format_name, user=user) + + doi = article_format.article.doi.first() + if not doi: + handler_formatting_error( + article_format=article_format, + message=f"Unable to format because the article {pid_v3} has no DOI associated with it" + ) + return + + prefix = doi.value.split("/")[0] + try: + data = CrossRefConfiguration.get_data(prefix) + except CrossRefConfiguration.DoesNotExist: + handler_formatting_error( + article_format=article_format, + message=f"Unable to convert article {pid_v3} to crossref format. CrossrefConfiguration missing" + ) + return + + function_format = get_function_format_xml(format_name=format_name) + content = function_format(article_format.article.xmltree, data) + article_format.save_format_xml( + format_xml=content, + filename=article_format.article.sps_pkg_name + ".xml", + status="S" + ) + @celery_app.task(bind=True) def task_articles_complete_data( self, user_id=None, username=None, from_date=None, force_update=False From a8c853658c4d6a5d1146138bde9853f870c50578 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 23 Aug 2024 15:06:52 -0300 Subject: [PATCH 05/15] Altera get_data para provocar error --- doi_manager/models.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/doi_manager/models.py b/doi_manager/models.py index dd5c88a4..1f1d982e 100644 --- a/doi_manager/models.py +++ b/doi_manager/models.py @@ -30,7 +30,5 @@ def data(self): @classmethod def get_data(cls, prefix): - try: - return cls.objects.get(prefix=prefix).data - except cls.DoesNotExist: - return cls().data + return cls.objects.get(prefix=prefix).data + From 21d21e57ed1d4f52ac0fc9a34877b0c4624e3710 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 23 Aug 2024 15:07:13 -0300 Subject: [PATCH 06/15] Altera testes para entrar em conformidade --- article/tests.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/article/tests.py b/article/tests.py index 9e81f223..090a4275 100755 --- a/article/tests.py +++ b/article/tests.py @@ -7,7 +7,7 @@ from unittest.mock import patch, PropertyMock from article.models import Article, ArticleFormat -from article.tasks import remove_duplicate_articles, convert_xml_to_other_formats +from article.tasks import remove_duplicate_articles, convert_xml_to_pubmed_or_pmc_formats, convert_xml_to_crossref_format from core.users.models import User from doi.models import DOI from doi_manager.models import CrossRefConfiguration @@ -204,7 +204,7 @@ def test_convert_xml_to_pmc_formats(self, mock_pipeline_pmc, mock_property_xmltr mock_property_xmltree.return_value = self.input_xml mock_pipeline_pmc.return_value = self.modified_xml - convert_xml_to_other_formats(pid_v3=self.article.pid_v3, format_name="pmc", username="admin") + convert_xml_to_pubmed_or_pmc_formats(pid_v3=self.article.pid_v3, format_name="pmc", username="admin") mock_pipeline_pmc.assert_called_once_with(mock_property_xmltree.return_value) self.verify_article_format(status="S", version=1) @@ -215,7 +215,7 @@ def test_convert_xml_to_pubmed_formats(self, mock_pipeline_pubmed, mock_property mock_property_xmltree.return_value = self.input_xml mock_pipeline_pubmed.return_value = self.modified_xml - convert_xml_to_other_formats(pid_v3=self.article.pid_v3, format_name="pubmed", username="admin") + convert_xml_to_pubmed_or_pmc_formats(pid_v3=self.article.pid_v3, format_name="pubmed", username="admin") mock_pipeline_pubmed.assert_called_once_with(mock_property_xmltree.return_value) self.verify_article_format(status="S", version=1) @@ -228,12 +228,12 @@ def test_convert_xml_to_crossref_formats(self, mock_pipeline_crossref, mock_prop mock_property_xmltree.return_value = self.input_xml mock_pipeline_crossref.return_value = self.modified_xml - convert_xml_to_other_formats(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") + convert_xml_to_crossref_format(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") self.verify_article_format(status="S", version=1) def test_convert_xml_to_crossref_formats_without_doi(self): - convert_xml_to_other_formats(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") + convert_xml_to_crossref_format(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") expected_msg = {"exception_msg": f"Unable to format because the article {self.article.pid_v3} has no DOI associated with it"} self.verify_article_format(status="E", version=1, file_exists=False, report=expected_msg) @@ -242,7 +242,7 @@ def test_convert_xml_to_crossref_formats_without_doi(self): def test_convert_xml_to_crossref_formats_missing_crossref_configuration(self, mock_get_data): self.article.doi.add(self.doi) mock_get_data.side_effect = CrossRefConfiguration.DoesNotExist - convert_xml_to_other_formats(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") + convert_xml_to_crossref_format(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") expected_prefix = '10.1000.10' mock_get_data.assert_called_once_with(expected_prefix) From 052051ee05cff216e9d924867795f0dbbf6473ae Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 23 Aug 2024 15:17:23 -0300 Subject: [PATCH 07/15] - Altera campo format_name e adiciona status - adiciona metodo save_format_xml --- article/models.py | 62 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/article/models.py b/article/models.py index 0798d5f5..8e55253c 100755 --- a/article/models.py +++ b/article/models.py @@ -635,18 +635,29 @@ def article_directory_path(instance, filename): except AttributeError: return os.path.join(instance.article.pid_v3, instance.format_name, filename) +STATUS_EXPORT_FILE = [ + ("E", "Error occurred during export format creation"), + ("S", "Export format created successfully"), + ("A", "Export format available on external site"), +] + +TYPE_OF_FORMAT = [ + ("crossref", "Crossref"), + ("pubmed", "PubMed"), + ("pmc", "PubMed PMC"), + ("doaj", "DOAJ"), +] class ArticleFormat(CommonControlField): - article = ParentalKey( Article, null=True, blank=True, on_delete=models.SET_NULL, - related_name="format", + related_name="article_format", ) format_name = models.CharField( - _("Article Format"), max_length=20, null=True, blank=True + _("Article Format"), max_length=20, null=True, blank=True, choices=TYPE_OF_FORMAT ) version = models.PositiveIntegerField(null=True, blank=True) file = models.FileField( @@ -657,6 +668,12 @@ class ArticleFormat(CommonControlField): ) report = models.JSONField(null=True, blank=True) valid = models.BooleanField(default=None, null=True, blank=True) + status = models.CharField( + blank=True, + null=True, + max_length=1, + choices=STATUS_EXPORT_FILE + ) finger_print = models.CharField(max_length=64, null=True, blank=True) base_form_class = CoreAdminModelForm @@ -664,6 +681,7 @@ class ArticleFormat(CommonControlField): FieldPanel("file"), FieldPanel("format_name"), FieldPanel("version"), + FieldPanel("status"), FieldPanel("report"), ] @@ -712,7 +730,7 @@ def get(cls, article, format_name=None, version=None): @classmethod def create(cls, user, article, format_name=None, version=None): - if article or format_name or version: + if article and format_name or version: try: obj = cls() obj.article = article @@ -731,10 +749,6 @@ def create(cls, user, article, format_name=None, version=None): def create_or_update(cls, user, article, format_name=None, version=None): try: obj = cls.get(article, format_name=format_name, version=version) - obj.updated_by = user - obj.format_name = format_name or obj.format_name - obj.version = version or obj.version - obj.save() except cls.DoesNotExist: obj = cls.create(user, article, format_name, version) return obj @@ -750,6 +764,37 @@ def save_file(self, filename, content): self.finger_print = finger_print self.save() + def save_format_xml( + self, + format_xml, + filename, + status, + report=None, + indexed_check=False, + version=None, + ): + if indexed_check and not self.article.is_indexed_at(self.format_name): + return + try: + if filename and format_xml: + self.save_file(filename=filename, content=format_xml) + self.version = version or 1 + self.report = report + self.status = status + self.save() + except Exception as e: + exc_type, exc_value, exc_traceback = sys.exc_info() + unexpected_event = UnexpectedEvent.create( + exception=e, + exc_traceback=exc_traceback, + detail=dict( + function="article.models.ArticleFormat.generate", + format_name=self.format_name, + article_pid_v3=self.article.pid_v3, + sps_pkg_name=self.article.sps_pkg_name, + ), + ) + @classmethod def generate( cls, @@ -792,6 +837,7 @@ def generate( if obj: obj.report = unexpected_event.data obj.valid = False + obj.status = "E" obj.save() @classmethod From 20c662d5cde9a1a73fdfa5bbd7b89664c6712985 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Fri, 23 Aug 2024 15:17:58 -0300 Subject: [PATCH 08/15] remove metodos generate e generate formats --- article/models.py | 79 ----------------------------------------------- 1 file changed, 79 deletions(-) diff --git a/article/models.py b/article/models.py index 8e55253c..84ddb9a2 100755 --- a/article/models.py +++ b/article/models.py @@ -795,82 +795,3 @@ def save_format_xml( ), ) - @classmethod - def generate( - cls, - user, - article, - format_name, - filename, - function_generate_format, - indexed_check=False, - data=None, - version=None, - ): - if indexed_check and not article.is_indexed_at(format_name): - return - try: - version = version or 1 - obj = None - obj = cls.create_or_update(user, article, format_name, version) - xmltree = article.xmltree - if data is not None: - content = function_generate_format(xmltree, data=data) - else: - content = function_generate_format(xmltree) - obj.save_file(filename, content) - obj.report = None - obj.save() - return obj - except Exception as e: - exc_type, exc_value, exc_traceback = sys.exc_info() - unexpected_event = UnexpectedEvent.create( - exception=e, - exc_traceback=exc_traceback, - detail=dict( - function="article.models.ArticleFormat.generate", - format_name=format_name, - article_pid_v3=article.pid_v3, - sps_pkg_name=article.sps_pkg_name, - ), - ) - if obj: - obj.report = unexpected_event.data - obj.valid = False - obj.status = "E" - obj.save() - - @classmethod - def generate_formats(cls, user, article): - for doi in article.doi.all(): - if not doi.value: - break - try: - prefix = doi.value.split("/")[0] - crossref_data = CrossRefConfiguration.get_data(prefix) - cls.generate( - user, - article, - "crossref", - article.sps_pkg_name + ".xml", - crossref.pipeline_crossref, - data=crossref_data, - ) - except CrossRefConfiguration.DoesNotExist: - break - cls.generate( - user, - article, - "pubmed", - article.sps_pkg_name + ".xml", - pubmed.pipeline_pubmed, - indexed_check=False, - ) - cls.generate( - user, - article, - "pmc", - article.sps_pkg_name + ".xml", - pmc.pipeline_pmc, - indexed_check=False, - ) From d82347e7317012be03ca4cf7203ec988d8a1b2e6 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Sat, 24 Aug 2024 14:21:41 -0300 Subject: [PATCH 09/15] Altera os testes para refletir as mudancas em save_file --- article/models.py | 5 +++-- article/tests.py | 51 ++++++++++++++++++++--------------------------- 2 files changed, 25 insertions(+), 31 deletions(-) diff --git a/article/models.py b/article/models.py index 84ddb9a2..4ad38138 100755 --- a/article/models.py +++ b/article/models.py @@ -1,7 +1,7 @@ import os import sys from datetime import datetime - +from lxml import etree from django.core.files.base import ContentFile from django.db import IntegrityError, models from django.db.utils import DataError @@ -754,6 +754,7 @@ def create_or_update(cls, user, article, format_name=None, version=None): return obj def save_file(self, filename, content): + content = etree.tostring(content) finger_print = generate_finger_print(content) if finger_print != self.finger_print: try: @@ -776,7 +777,7 @@ def save_format_xml( if indexed_check and not self.article.is_indexed_at(self.format_name): return try: - if filename and format_xml: + if filename and len(format_xml): self.save_file(filename=filename, content=format_xml) self.version = version or 1 self.report = report diff --git a/article/tests.py b/article/tests.py index 090a4275..7d776700 100755 --- a/article/tests.py +++ b/article/tests.py @@ -1,3 +1,4 @@ +from lxml import etree from freezegun import freeze_time from django.test import TestCase from django.core.files.uploadedfile import SimpleUploadedFile @@ -75,8 +76,8 @@ def setUp(self): sps_pkg_name="0001-3714-rm-30-04-299", ) - self.test_file = SimpleUploadedFile("test_file.xml", b"Test", content_type="application/xml") - self.test_file2 = SimpleUploadedFile("test_file.xml", b"Test2", content_type="application/xml") + self.test_file = SimpleUploadedFile("test_file.xml", b"
Test
", content_type="application/xml") + self.test_file2 = SimpleUploadedFile("test_file2.xml", b"
Test2
", content_type="application/xml") self.article_format = ArticleFormat.objects.create( article=self.article, format_name='pmc', @@ -95,7 +96,6 @@ def verify_fields_model_article_format(self, article_format, version, format_nam if status: self.assertEqual(article_format.status, status) - def test_get_method(self): article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) self.verify_fields_model_article_format(article_format=article_format, format_name='pmc', version=1) @@ -123,48 +123,41 @@ def test_create_or_update_classmethod(self): version=1, ) self.verify_fields_model_article_format(article_format=article_format, format_name='pmc', version=1) + + def convert_and_compare_xml(self, article_format, filename, xml_content): + article_format.save_file(filename=filename, content=xml_content) + with article_format.file.open("rb") as f: + saved_content = f.read() + self.assertEqual(etree.tostring(etree.fromstring(saved_content), encoding="utf-8"), + etree.tostring(xml_content, encoding="utf-8")) def test_save_file_method(self): filename = "0034-7094-rba-69-03-0227.xml" - content = self.test_file - content.seek(0) - content_bytes = content.read() article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) - article_format.save_file(filename=filename, content=content_bytes) - - with article_format.file.open('rb') as f: - saved_content = f.read() - - self.assertEqual(saved_content, content_bytes) + self.test_file.seek(0) + input_xml = etree.fromstring(self.test_file.read()) + self.convert_and_compare_xml(article_format=article_format, filename=filename, xml_content=input_xml) def test_update_xml_in_save_file_method(self): filename = "0034-7094-rba-69-03-0227.xml" - content = self.test_file - content_update = self.test_file2 - content.seek(0) - content_bytes = content.read() - article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) - article_format.save_file(filename=filename, content=content_bytes) + self.test_file.seek(0) + input_xml = etree.fromstring(self.test_file.read()) - content_update.seek(0) - content_update_bytes = content_update.read() - article_format.save_file(filename=filename, content=content_update_bytes) - - with article_format.file.open('rb') as f: - saved_content = f.read() + article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) + self.convert_and_compare_xml(article_format=article_format, filename=filename, xml_content=input_xml) - self.assertEqual(saved_content, content_update_bytes) + self.test_file2.seek(0) + update_xml = etree.fromstring(self.test_file2.read()) + self.convert_and_compare_xml(article_format=article_format, filename=filename, xml_content=update_xml) def test_save_format_xml_method(self): article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) - input_xml = "
Original
" - + input_xml = etree.fromstring("
Test
") filename = article_format.article.sps_pkg_name + ".xml" article_format.save_format_xml(format_xml=input_xml, filename=filename, status="S") with article_format.file.open('rb') as f: saved_content = f.read() - self.assertEqual(saved_content, input_xml.encode('utf-8')) - + self.assertEqual(saved_content, etree.tostring(input_xml, encoding="utf-8")) self.verify_fields_model_article_format(article_format=article_format, status="S", version=1) class TasksConvertXmlFormatsTest(TestCase): From 03aaa430beae45e623bba84b530d7a8299195da5 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Sat, 24 Aug 2024 14:34:51 -0300 Subject: [PATCH 10/15] Altera input_xml e modified_xml para tipo etree --- article/tests.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/article/tests.py b/article/tests.py index 7d776700..19330356 100755 --- a/article/tests.py +++ b/article/tests.py @@ -173,8 +173,8 @@ def setUp(self): username="admin", ) - self.input_xml = "
Original PMC
" - self.modified_xml = "
Modified PMC
" + self.input_xml = etree.fromstring("
Original PMC
") + self.modified_xml = etree.fromstring("
Modified PMC
") def verify_article_format(self, status, version, pid_v3=None, report=None, file_exists=True): self.assertEqual(ArticleFormat.objects.count(), 1) @@ -187,7 +187,7 @@ def verify_article_format(self, status, version, pid_v3=None, report=None, file_ if file_exists: with article_format.file.open('rb') as f: content = f.read() - self.assertEqual(content, self.modified_xml.encode('utf-8')) + self.assertEqual(content, etree.tostring(self.modified_xml, encoding="utf-8")) else: self.assertFalse(article_format.file) From c7a26ee751c62ae7a68e241e820e0a3c7c2fe21f Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Sat, 24 Aug 2024 14:35:01 -0300 Subject: [PATCH 11/15] Migration --- ...us_alter_articleformat_article_and_more.py | 56 +++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 article/migrations/0014_articleformat_status_alter_articleformat_article_and_more.py diff --git a/article/migrations/0014_articleformat_status_alter_articleformat_article_and_more.py b/article/migrations/0014_articleformat_status_alter_articleformat_article_and_more.py new file mode 100644 index 00000000..ad3705fd --- /dev/null +++ b/article/migrations/0014_articleformat_status_alter_articleformat_article_and_more.py @@ -0,0 +1,56 @@ +# Generated by Django 5.0.3 on 2024-08-23 18:12 + +import django.db.models.deletion +import modelcluster.fields +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("article", "0013_article_article_license"), + ] + + operations = [ + migrations.AddField( + model_name="articleformat", + name="status", + field=models.CharField( + blank=True, + choices=[ + ("E", "Error occurred during export format creation"), + ("S", "Export format created successfully"), + ("A", "Export format available on external site"), + ], + max_length=1, + null=True, + ), + ), + migrations.AlterField( + model_name="articleformat", + name="article", + field=modelcluster.fields.ParentalKey( + blank=True, + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="article_format", + to="article.article", + ), + ), + migrations.AlterField( + model_name="articleformat", + name="format_name", + field=models.CharField( + blank=True, + choices=[ + ("crossref", "Crossref"), + ("pubmed", "PubMed"), + ("pmc", "PubMed PMC"), + ("doaj", "DOAJ"), + ], + max_length=20, + null=True, + verbose_name="Article Format", + ), + ), + ] From 8b8ee1ae33eb45b0119871d9b270a53ce99d0fe3 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Sat, 24 Aug 2024 15:36:37 -0300 Subject: [PATCH 12/15] Remove teste obsoleto --- article/tests.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/article/tests.py b/article/tests.py index 19330356..a00f51b6 100755 --- a/article/tests.py +++ b/article/tests.py @@ -2,7 +2,6 @@ from freezegun import freeze_time from django.test import TestCase from django.core.files.uploadedfile import SimpleUploadedFile -from django_test_migrations.migrator import Migrator from datetime import datetime from django.utils.timezone import make_aware from unittest.mock import patch, PropertyMock @@ -14,24 +13,6 @@ from doi_manager.models import CrossRefConfiguration -class TestArticleMigration(TestCase): - def test_migration_0013_article_article_license(self): - migrator = Migrator(database='default') - old_state = migrator.apply_initial_migration(('article', '0012_alter_article_publisher')) - Article = old_state.apps.get_model('article', 'Article') - LicenseStatement = old_state.apps.get_model('core', 'LicenseStatement') - article = Article.objects.create() - license_statement = LicenseStatement.objects.create(url="https://www.teste.com.br") - article.license_statements.add(license_statement) - - new_state = migrator.apply_tested_migration(('article', '0013_article_article_license')) - - Article = new_state.apps.get_model('article', 'Article') - - article = Article.objects.first() - self.assertEqual(article.article_license, 'https://www.teste.com.br') - migrator.reset() - class RemoveDuplicateArticlesTest(TestCase): def create_article_at_time(self, dt, v3): From 8f7a00bc9d4d2ae08fa7d124d134768f76edaaae Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Sat, 24 Aug 2024 15:37:44 -0300 Subject: [PATCH 13/15] remove tasks obsoletas --- article/tasks.py | 65 ------------------------------------------------ 1 file changed, 65 deletions(-) diff --git a/article/tasks.py b/article/tasks.py index bfcf326e..561e0f40 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -130,71 +130,6 @@ def load_preprint(self, user_id, oai_pmh_preprint_uri): harvest_preprints(oai_pmh_preprint_uri, user) -@celery_app.task(bind=True) -def task_convert_xml_to_other_formats_for_articles( - self, user_id=None, username=None, from_date=None, force_update=False -): - try: - user = _get_user(self.request, username, user_id) - - for item in Article.objects.filter(sps_pkg_name__isnull=False).iterator(): - logging.info(item.pid_v3) - try: - convert_xml_to_other_formats.apply_async( - kwargs={ - "user_id": user.id, - "username": user.username, - "pid_v3": item.pid_v3, - "force_update": force_update, - } - ) - except Exception as exception: - exc_type, exc_value, exc_traceback = sys.exc_info() - UnexpectedEvent.create( - exception=exception, - exc_traceback=exc_traceback, - detail={ - "task": "article.tasks.task_convert_xml_to_other_formats_for_articles", - "item": str(item), - }, - ) - except Exception as exception: - exc_type, exc_value, exc_traceback = sys.exc_info() - UnexpectedEvent.create( - exception=exception, - exc_traceback=exc_traceback, - detail={ - "task": "article.tasks.task_convert_xml_to_other_formats_for_articles", - }, - ) - - -@celery_app.task(bind=True) -def convert_xml_to_other_formats( - self, user_id=None, username=None, item_id=None, force_update=None -): - user = _get_user(self.request, username, user_id) - - try: - article = Article.objects.get(pk=item_id) - except Article.DoesNotExist: - logging.info(f"Not found {item_id}") - return - - done = False - try: - article_format = ArticleFormat.objects.get(article=article) - done = True - except ArticleFormat.MultipleObjectsReturned: - done = True - except ArticleFormat.DoesNotExist: - done = False - logging.info(f"Done {done}") - - if not done or force_update: - ArticleFormat.generate_formats(user, article=article) - - @celery_app.task(bind=True) def task_convert_xml_to_other_formats_for_articles(self, format_name, user_id=None, username=None, force_update=False): journals = Journal.objects.filter(indexed_at__acronym=format_name) From ff7c98aea41e4f9da0833ca776a8c5c6b30e7d23 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Sat, 24 Aug 2024 15:39:19 -0300 Subject: [PATCH 14/15] Altera funcoes get_function_format_xml, handler_formatting_error e get_article_format de lugar --- article/tasks.py | 71 ++++++++++++++++++++++++------------------------ 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/article/tasks.py b/article/tasks.py index 561e0f40..477cd26f 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -130,6 +130,42 @@ def load_preprint(self, user_id, oai_pmh_preprint_uri): harvest_preprints(oai_pmh_preprint_uri, user) +def get_function_format_xml(format_name): + dict_functions_formats = { + "pmc": pmc.pipeline_pmc, + "pubmed": pubmed.pipeline_pubmed, + "crossref": crossref.pipeline_crossref, + } + return dict_functions_formats.get(format_name) + + +def handler_formatting_error(article_format, message): + article_format.save_format_xml( + filename=None, + format_xml=None, + status="E", + report={"exception_msg": message} + ) + +def get_article_format(user, pid_v3, format_name): + try: + article = Article.objects.get(pid_v3=pid_v3) + except Article.DoesNotExist: + logging.info(f"Unable to convert article {pid_v3} to the specified format") + return + + try: + article_format = ArticleFormat.objects.get(article=article, format_name="pmc") + except ArticleFormat.DoesNotExist: + article_format = ArticleFormat.create_or_update( + user=user, + article=article, + format_name=format_name, + version=1 + ) + return article_format + + @celery_app.task(bind=True) def task_convert_xml_to_other_formats_for_articles(self, format_name, user_id=None, username=None, force_update=False): journals = Journal.objects.filter(indexed_at__acronym=format_name) @@ -176,41 +212,6 @@ def task_convert_xml_to_other_formats_for_articles(self, format_name, user_id=No ) -def get_function_format_xml(format_name): - dict_functions_formats = { - "pmc": pmc.pipeline_pmc, - "pubmed": pubmed.pipeline_pubmed, - "crossref": crossref.pipeline_crossref, - } - return dict_functions_formats.get(format_name) - - -def handler_formatting_error(article_format, message): - article_format.save_format_xml( - filename=None, - format_xml=None, - status="E", - report={"exception_msg": message} - ) - -def get_article_format(user, pid_v3, format_name): - try: - article = Article.objects.get(pid_v3=pid_v3) - except Article.DoesNotExist: - logging.info(f"Unable to convert article {pid_v3} to the specified format") - return - - try: - article_format = ArticleFormat.objects.get(article=article, format_name="pmc") - except ArticleFormat.DoesNotExist: - article_format = ArticleFormat.create_or_update( - user=user, - article=article, - format_name=format_name, - version=1 - ) - return article_format - @celery_app.task(bind=True) def convert_xml_to_pubmed_or_pmc_formats(self, pid_v3, format_name, user_id=None, username=None): user = _get_user(request=self.request, username=username, user_id=user_id) From e90381be1e0eedeac45be0752c1420e4cbcd0938 Mon Sep 17 00:00:00 2001 From: Samuel Veiga Rangel Date: Sat, 24 Aug 2024 15:40:38 -0300 Subject: [PATCH 15/15] black --- article/tasks.py | 103 +++++++++++++++--------- article/tests.py | 203 +++++++++++++++++++++++++++++++---------------- 2 files changed, 198 insertions(+), 108 deletions(-) diff --git a/article/tasks.py b/article/tasks.py index 477cd26f..cce70837 100644 --- a/article/tasks.py +++ b/article/tasks.py @@ -54,11 +54,13 @@ def _items_to_load_article(from_date, force_update): if not from_date: # obtém a última atualização de Article try: - article = Article.objects.filter( - ~Q(valid=True) - ).order_by("-updated").first() + article = ( + Article.objects.filter(~Q(valid=True)).order_by("-updated").first() + ) if not article: - article = Article.objects.filter(valid=True).order_by("-updated").first() + article = ( + Article.objects.filter(valid=True).order_by("-updated").first() + ) if article: from_date = article.updated except Article.DoesNotExist: @@ -141,12 +143,10 @@ def get_function_format_xml(format_name): def handler_formatting_error(article_format, message): article_format.save_format_xml( - filename=None, - format_xml=None, - status="E", - report={"exception_msg": message} + filename=None, format_xml=None, status="E", report={"exception_msg": message} ) + def get_article_format(user, pid_v3, format_name): try: article = Article.objects.get(pid_v3=pid_v3) @@ -158,16 +158,15 @@ def get_article_format(user, pid_v3, format_name): article_format = ArticleFormat.objects.get(article=article, format_name="pmc") except ArticleFormat.DoesNotExist: article_format = ArticleFormat.create_or_update( - user=user, - article=article, - format_name=format_name, - version=1 + user=user, article=article, format_name=format_name, version=1 ) return article_format @celery_app.task(bind=True) -def task_convert_xml_to_other_formats_for_articles(self, format_name, user_id=None, username=None, force_update=False): +def task_convert_xml_to_other_formats_for_articles( + self, format_name, user_id=None, username=None, force_update=False +): journals = Journal.objects.filter(indexed_at__acronym=format_name) articles = Article.objects.filter(journal__in=journals) @@ -213,10 +212,14 @@ def task_convert_xml_to_other_formats_for_articles(self, format_name, user_id=No @celery_app.task(bind=True) -def convert_xml_to_pubmed_or_pmc_formats(self, pid_v3, format_name, user_id=None, username=None): - user = _get_user(request=self.request, username=username, user_id=user_id) +def convert_xml_to_pubmed_or_pmc_formats( + self, pid_v3, format_name, user_id=None, username=None +): + user = _get_user(request=self.request, username=username, user_id=user_id) - article_format = get_article_format(pid_v3=pid_v3, format_name=format_name, user=user) + article_format = get_article_format( + pid_v3=pid_v3, format_name=format_name, user=user + ) function_format = get_function_format_xml(format_name=format_name) @@ -224,31 +227,35 @@ def convert_xml_to_pubmed_or_pmc_formats(self, pid_v3, format_name, user_id=None article_format.save_format_xml( format_xml=content, filename=article_format.article.sps_pkg_name + ".xml", - status="S" + status="S", ) @celery_app.task(bind=True) -def convert_xml_to_crossref_format(self, pid_v3, format_name, user_id=None, username=None): - user = _get_user(request=self.request, username=username, user_id=user_id) +def convert_xml_to_crossref_format( + self, pid_v3, format_name, user_id=None, username=None +): + user = _get_user(request=self.request, username=username, user_id=user_id) + + article_format = get_article_format( + pid_v3=pid_v3, format_name=format_name, user=user + ) - article_format = get_article_format(pid_v3=pid_v3, format_name=format_name, user=user) - doi = article_format.article.doi.first() if not doi: handler_formatting_error( - article_format=article_format, - message=f"Unable to format because the article {pid_v3} has no DOI associated with it" + article_format=article_format, + message=f"Unable to format because the article {pid_v3} has no DOI associated with it", ) return - + prefix = doi.value.split("/")[0] try: data = CrossRefConfiguration.get_data(prefix) except CrossRefConfiguration.DoesNotExist: handler_formatting_error( - article_format=article_format, - message=f"Unable to convert article {pid_v3} to crossref format. CrossrefConfiguration missing" + article_format=article_format, + message=f"Unable to convert article {pid_v3} to crossref format. CrossrefConfiguration missing", ) return @@ -257,9 +264,10 @@ def convert_xml_to_crossref_format(self, pid_v3, format_name, user_id=None, user article_format.save_format_xml( format_xml=content, filename=article_format.article.sps_pkg_name + ".xml", - status="S" + status="S", ) + @celery_app.task(bind=True) def task_articles_complete_data( self, user_id=None, username=None, from_date=None, force_update=False @@ -311,26 +319,32 @@ def article_complete_data( pass - @celery_app.task(bind=True) -def transfer_license_statements_fk_to_article_license(self, user_id=None, username=None): +def transfer_license_statements_fk_to_article_license( + self, user_id=None, username=None +): user = _get_user(self.request, username, user_id) articles_to_update = [] for instance in Article.objects.filter(article_license__isnull=True): new_license = None - if instance.license_statements.exists() and instance.license_statements.first().url: + if ( + instance.license_statements.exists() + and instance.license_statements.first().url + ): new_license = instance.license_statements.first().url elif instance.license and instance.license.license_type: new_license = instance.license.license_type - + if new_license: instance.article_license = new_license instance.updated_by = user articles_to_update.append(instance) if articles_to_update: - Article.objects.bulk_update(articles_to_update, ['article_license', 'updated_by']) + Article.objects.bulk_update( + articles_to_update, ["article_license", "updated_by"] + ) logging.info("The article_license of model Articles have been updated") @@ -338,15 +352,26 @@ def remove_duplicate_articles(pid_v3=None): ids_to_exclude = [] try: if pid_v3: - duplicates = Article.objects.filter(pid_v3=pid_v3).values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1) + duplicates = ( + Article.objects.filter(pid_v3=pid_v3) + .values("pid_v3") + .annotate(pid_v3_count=Count("pid_v3")) + .filter(pid_v3_count__gt=1) + ) else: - duplicates = Article.objects.values("pid_v3").annotate(pid_v3_count=Count("pid_v3")).filter(pid_v3_count__gt=1) + duplicates = ( + Article.objects.values("pid_v3") + .annotate(pid_v3_count=Count("pid_v3")) + .filter(pid_v3_count__gt=1) + ) for duplicate in duplicates: - article_ids = Article.objects.filter( - pid_v3=duplicate["pid_v3"] - ).order_by("created")[1:].values_list("id", flat=True) + article_ids = ( + Article.objects.filter(pid_v3=duplicate["pid_v3"]) + .order_by("created")[1:] + .values_list("id", flat=True) + ) ids_to_exclude.extend(article_ids) - + if ids_to_exclude: Article.objects.filter(id__in=ids_to_exclude).delete() except Exception as exception: @@ -359,7 +384,7 @@ def remove_duplicate_articles(pid_v3=None): }, ) + @celery_app.task(bind=True) def remove_duplicate_articles_task(self, user_id=None, username=None, pid_v3=None): remove_duplicate_articles(pid_v3) - diff --git a/article/tests.py b/article/tests.py index a00f51b6..a18a83eb 100755 --- a/article/tests.py +++ b/article/tests.py @@ -7,18 +7,24 @@ from unittest.mock import patch, PropertyMock from article.models import Article, ArticleFormat -from article.tasks import remove_duplicate_articles, convert_xml_to_pubmed_or_pmc_formats, convert_xml_to_crossref_format +from article.tasks import ( + remove_duplicate_articles, + convert_xml_to_pubmed_or_pmc_formats, + convert_xml_to_crossref_format, +) from core.users.models import User from doi.models import DOI from doi_manager.models import CrossRefConfiguration - class RemoveDuplicateArticlesTest(TestCase): def create_article_at_time(self, dt, v3): @freeze_time(dt) def create_article(): - Article.objects.create(pid_v3=v3, created=make_aware(datetime.strptime(dt, "%Y-%m-%d"))) + Article.objects.create( + pid_v3=v3, created=make_aware(datetime.strptime(dt, "%Y-%m-%d")) + ) + create_article() def test_remove_duplicates_keeps_earliest_article(self): @@ -27,13 +33,17 @@ def test_remove_duplicates_keeps_earliest_article(self): self.create_article_at_time("2023-01-03", "pid1") remove_duplicate_articles() self.assertEqual(Article.objects.all().count(), 1) - self.assertEqual(Article.objects.all()[0].created, make_aware(datetime(2023, 1, 1))) + self.assertEqual( + Article.objects.all()[0].created, make_aware(datetime(2023, 1, 1)) + ) def test_no_removal_if_only_one_article(self): self.create_article_at_time("2023-01-01", "pid1") remove_duplicate_articles() self.assertEqual(Article.objects.all().count(), 1) - self.assertEqual(Article.objects.all()[0].created, make_aware(datetime(2023, 1, 1))) + self.assertEqual( + Article.objects.all()[0].created, make_aware(datetime(2023, 1, 1)) + ) def test_remove_duplicates_for_multiple_pids(self): self.create_article_at_time("2022-06-03", "pid2") @@ -43,8 +53,13 @@ def test_remove_duplicates_for_multiple_pids(self): remove_duplicate_articles() self.assertEqual(Article.objects.filter(pid_v3="pid2").count(), 1) self.assertEqual(Article.objects.filter(pid_v3="pid3").count(), 1) - self.assertEqual(Article.objects.get(pid_v3="pid2").created, make_aware(datetime(2022, 6, 3))) - self.assertEqual(Article.objects.get(pid_v3="pid3").created, make_aware(datetime(2022, 6, 14))) + self.assertEqual( + Article.objects.get(pid_v3="pid2").created, make_aware(datetime(2022, 6, 3)) + ) + self.assertEqual( + Article.objects.get(pid_v3="pid3").created, + make_aware(datetime(2022, 6, 14)), + ) class ArticleFormatModelTest(TestCase): @@ -57,44 +72,58 @@ def setUp(self): sps_pkg_name="0001-3714-rm-30-04-299", ) - self.test_file = SimpleUploadedFile("test_file.xml", b"
Test
", content_type="application/xml") - self.test_file2 = SimpleUploadedFile("test_file2.xml", b"
Test2
", content_type="application/xml") + self.test_file = SimpleUploadedFile( + "test_file.xml", + b"
Test
", + content_type="application/xml", + ) + self.test_file2 = SimpleUploadedFile( + "test_file2.xml", + b"
Test2
", + content_type="application/xml", + ) self.article_format = ArticleFormat.objects.create( article=self.article, - format_name='pmc', + format_name="pmc", version=1, file=self.test_file, valid=True, status="S", ) - def verify_fields_model_article_format(self, article_format, version, format_name=None, status=None, file=None): - self.assertEqual(article_format.article, self.article) + def verify_fields_model_article_format( + self, article_format, version, format_name=None, status=None, file=None + ): + self.assertEqual(article_format.article, self.article) if format_name: self.assertEqual(article_format.format_name, format_name) self.assertEqual(article_format.version, version) self.assertEqual(article_format.report, None) if status: self.assertEqual(article_format.status, status) - + def test_get_method(self): - article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) - self.verify_fields_model_article_format(article_format=article_format, format_name='pmc', version=1) + article_format = ArticleFormat.get(self.article, format_name="pmc", version=1) + self.verify_fields_model_article_format( + article_format=article_format, format_name="pmc", version=1 + ) def test_create_classmethod(self): article_format = ArticleFormat.create( - user=self.user, - article=self.article, - format_name='pubmed', - version=1 + user=self.user, article=self.article, format_name="pubmed", version=1 + ) + self.verify_fields_model_article_format( + article_format=article_format, format_name="pubmed", version=1 ) - self.verify_fields_model_article_format(article_format=article_format, format_name='pubmed', version=1) def test_get_method_raises_value_error(self): with self.assertRaises(ValueError) as context: - ArticleFormat.get(self.article, format_name='pubmed') + ArticleFormat.get(self.article, format_name="pubmed") - self.assertEqual(str(context.exception), "ArticleFormat.get requires article and format_name and version") + self.assertEqual( + str(context.exception), + "ArticleFormat.get requires article and format_name and version", + ) def test_create_or_update_classmethod(self): article_format = ArticleFormat.create_or_update( @@ -103,49 +132,62 @@ def test_create_or_update_classmethod(self): format_name="pmc", version=1, ) - self.verify_fields_model_article_format(article_format=article_format, format_name='pmc', version=1) - + self.verify_fields_model_article_format( + article_format=article_format, format_name="pmc", version=1 + ) + def convert_and_compare_xml(self, article_format, filename, xml_content): article_format.save_file(filename=filename, content=xml_content) with article_format.file.open("rb") as f: saved_content = f.read() - self.assertEqual(etree.tostring(etree.fromstring(saved_content), encoding="utf-8"), - etree.tostring(xml_content, encoding="utf-8")) + self.assertEqual( + etree.tostring(etree.fromstring(saved_content), encoding="utf-8"), + etree.tostring(xml_content, encoding="utf-8"), + ) def test_save_file_method(self): filename = "0034-7094-rba-69-03-0227.xml" - article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) + article_format = ArticleFormat.get(self.article, format_name="pmc", version=1) self.test_file.seek(0) input_xml = etree.fromstring(self.test_file.read()) - self.convert_and_compare_xml(article_format=article_format, filename=filename, xml_content=input_xml) + self.convert_and_compare_xml( + article_format=article_format, filename=filename, xml_content=input_xml + ) def test_update_xml_in_save_file_method(self): filename = "0034-7094-rba-69-03-0227.xml" self.test_file.seek(0) input_xml = etree.fromstring(self.test_file.read()) - article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) - self.convert_and_compare_xml(article_format=article_format, filename=filename, xml_content=input_xml) + article_format = ArticleFormat.get(self.article, format_name="pmc", version=1) + self.convert_and_compare_xml( + article_format=article_format, filename=filename, xml_content=input_xml + ) self.test_file2.seek(0) update_xml = etree.fromstring(self.test_file2.read()) - self.convert_and_compare_xml(article_format=article_format, filename=filename, xml_content=update_xml) + self.convert_and_compare_xml( + article_format=article_format, filename=filename, xml_content=update_xml + ) def test_save_format_xml_method(self): - article_format = ArticleFormat.get(self.article, format_name='pmc', version=1) + article_format = ArticleFormat.get(self.article, format_name="pmc", version=1) input_xml = etree.fromstring("
Test
") filename = article_format.article.sps_pkg_name + ".xml" - article_format.save_format_xml(format_xml=input_xml, filename=filename, status="S") - with article_format.file.open('rb') as f: + article_format.save_format_xml( + format_xml=input_xml, filename=filename, status="S" + ) + with article_format.file.open("rb") as f: saved_content = f.read() self.assertEqual(saved_content, etree.tostring(input_xml, encoding="utf-8")) - self.verify_fields_model_article_format(article_format=article_format, status="S", version=1) + self.verify_fields_model_article_format( + article_format=article_format, status="S", version=1 + ) + class TasksConvertXmlFormatsTest(TestCase): def setUp(self): - self.doi = DOI.objects.create( - value="10.1000.10/123456" - ) + self.doi = DOI.objects.create(value="10.1000.10/123456") self.article = Article.objects.create( pid_v3="P3swRmPHQfy37r9xRbLCw8G", sps_pkg_name="0001-3714-rm-30-04-299", @@ -154,10 +196,16 @@ def setUp(self): username="admin", ) - self.input_xml = etree.fromstring("
Original PMC
") - self.modified_xml = etree.fromstring("
Modified PMC
") - - def verify_article_format(self, status, version, pid_v3=None, report=None, file_exists=True): + self.input_xml = etree.fromstring( + "
Original PMC
" + ) + self.modified_xml = etree.fromstring( + "
Modified PMC
" + ) + + def verify_article_format( + self, status, version, pid_v3=None, report=None, file_exists=True + ): self.assertEqual(ArticleFormat.objects.count(), 1) article_format = ArticleFormat.objects.first() self.assertEqual(article_format.article.pid_v3, pid_v3 or self.article.pid_v3) @@ -166,57 +214,74 @@ def verify_article_format(self, status, version, pid_v3=None, report=None, file_ if report: self.assertEqual(article_format.report, report) if file_exists: - with article_format.file.open('rb') as f: + with article_format.file.open("rb") as f: content = f.read() - self.assertEqual(content, etree.tostring(self.modified_xml, encoding="utf-8")) + self.assertEqual( + content, etree.tostring(self.modified_xml, encoding="utf-8") + ) else: self.assertFalse(article_format.file) - - @patch('article.models.Article.xmltree', new_callable=PropertyMock) - @patch('article.tasks.pmc.pipeline_pmc') + + @patch("article.models.Article.xmltree", new_callable=PropertyMock) + @patch("article.tasks.pmc.pipeline_pmc") def test_convert_xml_to_pmc_formats(self, mock_pipeline_pmc, mock_property_xmltree): mock_property_xmltree.return_value = self.input_xml mock_pipeline_pmc.return_value = self.modified_xml - convert_xml_to_pubmed_or_pmc_formats(pid_v3=self.article.pid_v3, format_name="pmc", username="admin") + convert_xml_to_pubmed_or_pmc_formats( + pid_v3=self.article.pid_v3, format_name="pmc", username="admin" + ) mock_pipeline_pmc.assert_called_once_with(mock_property_xmltree.return_value) self.verify_article_format(status="S", version=1) - - @patch('article.models.Article.xmltree', new_callable=PropertyMock) - @patch('article.tasks.pubmed.pipeline_pubmed') - def test_convert_xml_to_pubmed_formats(self, mock_pipeline_pubmed, mock_property_xmltree): + @patch("article.models.Article.xmltree", new_callable=PropertyMock) + @patch("article.tasks.pubmed.pipeline_pubmed") + def test_convert_xml_to_pubmed_formats( + self, mock_pipeline_pubmed, mock_property_xmltree + ): mock_property_xmltree.return_value = self.input_xml mock_pipeline_pubmed.return_value = self.modified_xml - convert_xml_to_pubmed_or_pmc_formats(pid_v3=self.article.pid_v3, format_name="pubmed", username="admin") + convert_xml_to_pubmed_or_pmc_formats( + pid_v3=self.article.pid_v3, format_name="pubmed", username="admin" + ) mock_pipeline_pubmed.assert_called_once_with(mock_property_xmltree.return_value) self.verify_article_format(status="S", version=1) - - @patch('doi_manager.models.CrossRefConfiguration.get_data', return_value=dict()) - @patch('article.models.Article.xmltree', new_callable=PropertyMock) - @patch('article.tasks.crossref.pipeline_crossref') - def test_convert_xml_to_crossref_formats(self, mock_pipeline_crossref, mock_property_xmltree, mock_get_data): + @patch("doi_manager.models.CrossRefConfiguration.get_data", return_value=dict()) + @patch("article.models.Article.xmltree", new_callable=PropertyMock) + @patch("article.tasks.crossref.pipeline_crossref") + def test_convert_xml_to_crossref_formats( + self, mock_pipeline_crossref, mock_property_xmltree, mock_get_data + ): self.article.doi.add(self.doi) mock_property_xmltree.return_value = self.input_xml mock_pipeline_crossref.return_value = self.modified_xml - convert_xml_to_crossref_format(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") + convert_xml_to_crossref_format( + pid_v3=self.article.pid_v3, format_name="crossref", username="admin" + ) self.verify_article_format(status="S", version=1) - def test_convert_xml_to_crossref_formats_without_doi(self): - convert_xml_to_crossref_format(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") - expected_msg = {"exception_msg": f"Unable to format because the article {self.article.pid_v3} has no DOI associated with it"} - self.verify_article_format(status="E", version=1, file_exists=False, report=expected_msg) - + convert_xml_to_crossref_format( + pid_v3=self.article.pid_v3, format_name="crossref", username="admin" + ) + expected_msg = { + "exception_msg": f"Unable to format because the article {self.article.pid_v3} has no DOI associated with it" + } + self.verify_article_format( + status="E", version=1, file_exists=False, report=expected_msg + ) - @patch('doi_manager.models.CrossRefConfiguration.get_data') - def test_convert_xml_to_crossref_formats_missing_crossref_configuration(self, mock_get_data): + @patch("doi_manager.models.CrossRefConfiguration.get_data") + def test_convert_xml_to_crossref_formats_missing_crossref_configuration( + self, mock_get_data + ): self.article.doi.add(self.doi) mock_get_data.side_effect = CrossRefConfiguration.DoesNotExist - convert_xml_to_crossref_format(pid_v3=self.article.pid_v3, format_name="crossref", username="admin") - expected_prefix = '10.1000.10' + convert_xml_to_crossref_format( + pid_v3=self.article.pid_v3, format_name="crossref", username="admin" + ) + expected_prefix = "10.1000.10" mock_get_data.assert_called_once_with(expected_prefix) -