Skip to content

Commit

Permalink
Merge pull request #1221 from scieloorg/beta
Browse files Browse the repository at this point in the history
Incorporação de códigos estáveis
  • Loading branch information
gustavofonseca committed Nov 11, 2015
2 parents f792b29 + bd84aad commit f8e7443
Show file tree
Hide file tree
Showing 10 changed files with 3,313 additions and 21 deletions.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

47 changes: 47 additions & 0 deletions scielomanager/journalmanager/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

logger = logging.getLogger(__name__)

LINKABLE_ARTICLE_TYPES = ['correction', ]
EVENT_TYPES = [(ev_type, ev_type) for ev_type in ['added', 'deleted', 'updated']]
ISSUE_DEFAULT_LICENSE_HELP_TEXT = _(u"If not defined, will be applied the related journal's use license. \
The SciELO default use license is BY-NC. Please visit: http://ref.scielo.org/jf5ndd (5.2.11. Política de direitos autorais) for more details.")
Expand Down Expand Up @@ -1197,6 +1198,27 @@ class AheadPressRelease(PressRelease):
journal = models.ForeignKey(Journal, related_name='press_releases')


class ArticlesLinkage(models.Model):
""" Relação entre entidades do tipo `Article`.
Representa a relação que no XML é realizada por meio do elemento
`related-article`.
- `referrer` é a instância de `Article` que remete à outra.
- `link_to` é a instância de `Article` referida pela outra.
- `link_type` é o tipo da relação, que pode ser: corrected-article ou
commentary-article.
"""
referrer = models.ForeignKey('Article', related_name='links_to')
link_to = models.ForeignKey('Article', related_name='referrers')
link_type = models.CharField(max_length=32)

def __repr__(self):
return u'<%s referrer="%s" link_to="%s" link_type="%s">' % (
self.__class__.__name__, repr(self.referrer), repr(self.link_to),
self.link_type)


class Article(models.Model):
"""
Artigo associado ou não a um periódico ou fascículo.
Expand All @@ -1215,17 +1237,22 @@ class Article(models.Model):
updated_at = models.DateTimeField(auto_now=True, default=datetime.datetime.now)
es_updated_at = models.DateTimeField(null=True, blank=True) # elasticsearch
es_is_dirty = models.BooleanField(default=True)
articles_linkage_is_pending = models.BooleanField(default=False)

aid = models.CharField(max_length=32, unique=True, editable=False)
doi = models.CharField(max_length=2048, default=u'', db_index=True)
domain_key = models.SlugField(max_length=2048, unique=True, db_index=False, editable=False)
is_visible = models.BooleanField(default=True)
is_aop = models.BooleanField(default=False)
xml = XMLSPSField()
xml_version = models.CharField(max_length=9)
article_type = models.CharField(max_length=32, db_index=True)

# artigo pode estar temporariamente desassociado de seu periódico e fascículo
journal = models.ForeignKey(Journal, related_name='articles', blank=True, null=True)
issue = models.ForeignKey(Issue, related_name='articles', blank=True, null=True)
related_articles = models.ManyToManyField('self', through='ArticlesLinkage',
symmetrical=False, blank=True, null=True)
journal_title = models.CharField(_('Journal title'), max_length=512, db_index=True)
issn_ppub = models.CharField(max_length=9, db_index=True)
issn_epub = models.CharField(max_length=9, db_index=True)
Expand Down Expand Up @@ -1257,6 +1284,8 @@ class XPaths:
PID = '/article/front/article-meta/article-id[@pub-id-type="publisher-id"]'
ARTICLE_TYPE = '/article/@article-type'
AOP_ID = '/article/front/article-meta/article-id[@pub-id-type="other"]'
RELATED_CORRECTED_ARTICLES = '/article/front/article-meta/related-article[@related-article-type="corrected-article"]'
RELATED_COMMENTARY_ARTICLES = '/article/response/front-stub/related-article[@related-article-type="commentary-article"]'

def save(self, *args, **kwargs):
"""
Expand All @@ -1278,13 +1307,21 @@ def save(self, *args, **kwargs):
self.issn_epub = self.get_value(self.XPaths.ISSN_EPUB) or ''
self.xml_version = self.get_value(self.XPaths.SPS_VERSION) or 'pre-sps'
self.aid = str(uuid4().hex)
self.article_type = self.get_value(self.XPaths.ARTICLE_TYPE)
self.doi = self.get_value(self.XPaths.DOI) or ''

if not any([self.issn_ppub, self.issn_epub]):
raise ValueError('Either issn_ppub or issn_epub must be set')

if not self.journal_title:
raise ValueError('Could not get journal-title from %s' % self)

if not self.article_type:
raise ValueError('Could not get article-type from %s' % self)

if self.article_type in LINKABLE_ARTICLE_TYPES:
self.articles_linkage_is_pending = True

super(Article, self).save(*args, **kwargs)

def save_dirty(self, *args, **kwargs):
Expand Down Expand Up @@ -1375,6 +1412,16 @@ def _get_domain_key(self):
joined_values = '_'.join(text_values)
return slugify(joined_values)

def __repr__(self):
# para instâncias não salvas
if self.xml is None:
domain_key = u''
else:
domain_key = self.domain_key or self._get_domain_key()

return u'<%s aid="%s" domain_key="%s">' % (self.__class__.__name__,
self.aid, domain_key)


# --------------------
# Callbacks de signals
Expand Down
65 changes: 64 additions & 1 deletion scielomanager/journalmanager/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

from lxml import isoschematron, etree
from django.db.models import Q
from django.db import IntegrityError
from django.db import IntegrityError, transaction
from celery.utils.log import get_task_logger

from scielomanager.celery import app
Expand Down Expand Up @@ -292,3 +292,66 @@ def rebuild_articles_domain_key():

for article in articles:
rebuild_article_domain_key.delay(article.pk)


@app.task(ignore_result=True)
def link_article_with_their_related(article_pk):
""" Tenta associar artigos relacionados.
Essa função é idempotente, e pode ser executada inúmeras vezes até
que todas as referências de um artigo sejam estabelecidas.
Caso alguma exceção seja levantada, nenhuma mudança será persistida na
base de dados.
"""
try:
referrer = models.Article.objects.get(pk=article_pk,
articles_linkage_is_pending=True)
except models.Article.DoesNotExist:
logger.info('Cannot find Article with with pk: %s. Skipping the task.',
article_pk)
return None

related_article_elements = referrer.xml.xpath(
models.Article.XPaths.RELATED_CORRECTED_ARTICLES)

doi_type_pairs = ([elem.attrib['{http://www.w3.org/1999/xlink}href'],
elem.attrib['related-article-type']]
for elem in related_article_elements)

def _ensure_articles_are_linked(doi, rel_type):
try:
target = models.Article.objects.only('pk').get(doi=doi)
except (models.Article.DoesNotExist, models.Article.MultipleObjectsReturned) as exc:
logger.error('Cannot get article with DOI "%s". The error message is: "%s"',
doi, exc.message)
return False

_, created = models.ArticlesLinkage.objects.get_or_create(
referrer=referrer, link_to=target, link_type=rel_type)

logger.info('Article with pk %s is %s linked to %s',
'now' if created else 'already', target.pk, referrer.pk)

return True

with transaction.commit_on_success():
link_statuses = (_ensure_articles_are_linked(doi, rel_type)
for doi, rel_type in doi_type_pairs)

if all(link_statuses):
referrer.articles_linkage_is_pending = False
with avoid_circular_signals(ARTICLE_SAVE_MUTEX):
referrer.save()


@app.task(ignore_result=True)
def process_related_articles():
""" Tenta associar artigos relacionados.
"""
articles = models.Article.objects.only('pk').filter(
articles_linkage_is_pending=True)

for article in articles:
link_article_with_their_related.delay(article.pk)

4 changes: 4 additions & 0 deletions scielomanager/journalmanager/tests/modelfactories.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@
SAMPLE_XML = xml_file.read()


with open(os.path.join(_HERE, 'xml_samples', '0034-8910-rsp-48-2-0216_related.xml')) as xml_file:
SAMPLE_XML_RELATED = xml_file.read()


class UserFactory(factory.Factory):
FACTORY_FOR = models.User

Expand Down
113 changes: 106 additions & 7 deletions scielomanager/journalmanager/tests/tests_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -749,15 +749,17 @@ def test_edit_license_and_change_default(self):


class ArticleTests(TestCase):
sample = u"""<article specific-use="sps-1.2">
sample = u"""<article article-type="research-article" specific-use="sps-1.2">
<front>
<journal-meta>
<journal-title-group>
<journal-title>Revista de Saúde Pública</journal-title>
</journal-title-group>
<issn pub-type="ppub">1032-289X</issn>
<issn pub-type="epub">1032-2898</issn>
</journal-meta>
<article-meta>
<article-id pub-id-type="doi">10.1590/abcd</article-id>
<volume>1</volume>
<issue>10</issue>
<pub-date>
Expand All @@ -770,22 +772,30 @@ class ArticleTests(TestCase):
</article>"""

def test_fields_are_created_on_save(self):
auto_fields = ['journal_title', 'domain_key', 'aid', 'issn_ppub',
'issn_epub', 'xml_version', 'article_type', 'doi']

article = models.Article(xml=self.sample)

self.assertEqual(article.journal_title, '')
self.assertEqual(article.domain_key, '')
self.assertEqual(article.aid, '')
for field in auto_fields:
self.assertEqual(getattr(article, field), '')

article.save()

self.assertTrue(article.journal_title)
self.assertTrue(article.domain_key)
self.assertTrue(article.aid)
self.assertEquals(article.journal_title, u'Revista de Saúde Pública')
self.assertEquals(article.issn_ppub, u'1032-289X')
self.assertEquals(article.issn_epub, u'1032-2898')
self.assertEquals(article.xml_version, u'sps-1.2')
self.assertEquals(article.doi, u'10.1590/abcd')

def test_is_visible_defaults_to_true(self):
article = models.Article()
self.assertTrue(article.is_visible)

def test_articles_linkage_is_pending_defaults_to_false(self):
article = models.Article()
self.assertFalse(article.articles_linkage_is_pending)

def test_articles_are_unique(self):
from django.db import IntegrityError
article = models.Article(xml=self.sample)
Expand Down Expand Up @@ -993,6 +1003,37 @@ def test_aop_detection_when_is_not_aop_and_issue_data_is_missing(self):
article = models.Article(xml=sample)
self.assertFalse(article._get_is_aop())

def test_related_articles_detection(self):
sample = u"""<article specific-use="sps-1.2"
article-type="correction"
xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<journal-meta>
<journal-title-group>
<journal-title>Revista de Saúde Pública</journal-title>
</journal-title-group>
<issn pub-type="ppub">1032-289X</issn>
</journal-meta>
<article-meta>
<article-id pub-id-type="other">4809</article-id>
<volume>00</volume>
<issue>00</issue>
<pub-date>
<year>2014</year>
</pub-date>
<fpage>00</fpage>
<lpage>00</lpage>
<related-article related-article-type="corrected-article"
id="ra1"
xlink:href="10.1590/abd1806-4841.20142998"
ext-link-type="doi"/>
</article-meta>
</front>
</article>"""
article = models.Article(xml=sample)
article.save()
self.assertTrue(article.articles_linkage_is_pending)


class ArticleXpathsTests(TestCase):

Expand Down Expand Up @@ -1109,6 +1150,64 @@ def test_aop_id(self):
article.xml.xpath(models.Article.XPaths.AOP_ID)[0].text,
u'xpto')

def test_related_articles(self):
sample = u"""<article xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<article-meta>
<related-article related-article-type="corrected-article"
id="ra1"
xlink:href="10.1590/abd1806-4841.20142998"
ext-link-type="doi"/>
<related-article related-article-type="corrected-article"
id="ra1"
xlink:href="10.1590/abd1806-4841.20142999"
ext-link-type="doi"/>
</article-meta>
</front>
</article>"""

article = models.Article(xml=sample)
rel_articles = article.xml.xpath(models.Article.XPaths.RELATED_CORRECTED_ARTICLES)

self.assertEqual(len(rel_articles), 2)
self.assertEqual(
rel_articles[0].attrib['{http://www.w3.org/1999/xlink}href'],
'10.1590/abd1806-4841.20142998')
self.assertEqual(
rel_articles[1].attrib['{http://www.w3.org/1999/xlink}href'],
'10.1590/abd1806-4841.20142999')

def test_related_articles_at_response(self):
sample = u"""<article xmlns:xlink="http://www.w3.org/1999/xlink">
<front>
<article-meta>
</article-meta>
</front>
<response>
<front-stub>
<related-article related-article-type="commentary-article"
id="ra1"
xlink:href="10.1590/abd1806-4841.20142998"
ext-link-type="doi"/>
<related-article related-article-type="commentary-article"
id="ra1"
xlink:href="10.1590/abd1806-4841.20142999"
ext-link-type="doi"/>
</front-stub>
</response>
</article>"""

article = models.Article(xml=sample)
rel_articles = article.xml.xpath(models.Article.XPaths.RELATED_COMMENTARY_ARTICLES)

self.assertEqual(len(rel_articles), 2)
self.assertEqual(
rel_articles[0].attrib['{http://www.w3.org/1999/xlink}href'],
'10.1590/abd1806-4841.20142998')
self.assertEqual(
rel_articles[1].attrib['{http://www.w3.org/1999/xlink}href'],
'10.1590/abd1806-4841.20142999')


class ArticleDomainKeyTests(TestCase):
""" Domain key (chave de domínio) é uma chave candidata formada pelo uso
Expand Down
Loading

0 comments on commit f8e7443

Please sign in to comment.