diff --git a/jarbas/api/tests/__init__.py b/jarbas/api/tests/__init__.py index ec30b6988..d0e61a152 100644 --- a/jarbas/api/tests/__init__.py +++ b/jarbas/api/tests/__init__.py @@ -36,5 +36,6 @@ def get_sample_reimbursement_api_response(obj): receipt_text=obj.receipt_text, last_update=obj.last_update.strftime('%Y-%m-%dT%H:%M:%SZ'), available_in_latest_dataset=obj.available_in_latest_dataset, - receipt=dict(fetched=obj.receipt_fetched, url=obj.receipt_url) + receipt=dict(fetched=obj.receipt_fetched, url=obj.receipt_url), + search_vector=None ) diff --git a/jarbas/api/tests/test_reimbursement_view.py b/jarbas/api/tests/test_reimbursement_view.py index 09670b4b3..eefc04251 100644 --- a/jarbas/api/tests/test_reimbursement_view.py +++ b/jarbas/api/tests/test_reimbursement_view.py @@ -18,8 +18,8 @@ def get_reimbursement(**kwargs): kwargs['reimbursement_values'] = '200.00,500.00' kwargs['reimbursement_numbers'] = '2,3' if quantity == 1: - return mixer.blend(Reimbursement, **kwargs) - return mixer.cycle(quantity).blend(Reimbursement, **kwargs) + return mixer.blend(Reimbursement, search_vector=None, **kwargs) + return mixer.cycle(quantity).blend(Reimbursement, search_vector=None, **kwargs) class TestListApi(TestCase): diff --git a/jarbas/core/management/commands/searchvector.py b/jarbas/core/management/commands/searchvector.py new file mode 100644 index 000000000..514f34a00 --- /dev/null +++ b/jarbas/core/management/commands/searchvector.py @@ -0,0 +1,26 @@ +from django.core.management.base import BaseCommand +from django.contrib.postgres.search import SearchVector + +from jarbas.core.models import Reimbursement + + +class Command(BaseCommand): + + def handle(self, *args, **options): + total = Reimbursement.objects.count() + print('Creating search vector for {} reimbursements…'.format(total)) + print('This takes several minutes/hours.') + + search_vector = \ + SearchVector('congressperson_name', config='portuguese', weight='A') + \ + SearchVector('supplier', config='portuguese', weight='A') + \ + SearchVector('cnpj_cpf', config='portuguese', weight='A') + \ + SearchVector('party', config='portuguese', weight='A') + \ + SearchVector('state', config='portuguese', weight='B') + \ + SearchVector('receipt_text', config='portuguese', weight='B') + \ + SearchVector('passenger', config='portuguese', weight='C') + \ + SearchVector('leg_of_the_trip', config='portuguese', weight='C') + \ + SearchVector('subquota_description', config='portuguese', weight='D') + \ + SearchVector('subquota_group_description', config='portuguese', weight='D') + + Reimbursement.objects.update(search_vector=search_vector) diff --git a/jarbas/core/migrations/0039_add_search_vector_to_reimbursement.py b/jarbas/core/migrations/0039_add_search_vector_to_reimbursement.py new file mode 100644 index 000000000..8eb09a555 --- /dev/null +++ b/jarbas/core/migrations/0039_add_search_vector_to_reimbursement.py @@ -0,0 +1,26 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2017-09-27 20:19 +from __future__ import unicode_literals + +import django.contrib.postgres.search +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0038_auto_20170728_1748'), + ] + + operations = [ + migrations.AddField( + model_name='historicalreimbursement', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(null=True), + ), + migrations.AddField( + model_name='reimbursement', + name='search_vector', + field=django.contrib.postgres.search.SearchVectorField(null=True), + ), + ] diff --git a/jarbas/core/migrations/0040_create_gin_index_with_search_vector.py b/jarbas/core/migrations/0040_create_gin_index_with_search_vector.py new file mode 100644 index 000000000..d16e42a33 --- /dev/null +++ b/jarbas/core/migrations/0040_create_gin_index_with_search_vector.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.4 on 2017-09-28 02:37 +from __future__ import unicode_literals + +import django.contrib.postgres.indexes +from django.db import migrations + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0039_add_search_vector_to_reimbursement'), + ] + + operations = [ + migrations.AddIndex( + model_name='reimbursement', + index=django.contrib.postgres.indexes.GinIndex(fields=['search_vector'], name='core_reimbu_search__ba9b2f_gin'), + ), + ] diff --git a/jarbas/core/models.py b/jarbas/core/models.py index 12716fe15..aa97a3eac 100644 --- a/jarbas/core/models.py +++ b/jarbas/core/models.py @@ -1,4 +1,6 @@ from django.contrib.postgres.fields import JSONField +from django.contrib.postgres.indexes import GinIndex +from django.contrib.postgres.search import SearchVectorField from django.db import models from requests import head from simple_history.models import HistoricalRecords @@ -79,6 +81,8 @@ class Reimbursement(models.Model): receipt_url = models.CharField('URL do Documento Fiscal', max_length=140, blank=True, null=True) receipt_text = models.TextField('Texto do Recibo', blank=True, null=True) + search_vector = SearchVectorField(null=True) + history = HistoricalRecords() objects = models.Manager.from_queryset(ReimbursementQuerySet)() @@ -88,6 +92,7 @@ class Meta: verbose_name = 'reembolso' verbose_name_plural = 'reembolsos' index_together = [['year', 'issue_date', 'id']] + indexes = [GinIndex(fields=['search_vector'])] def get_receipt_url(self, force=False, bulk=False): if self.receipt_url: diff --git a/jarbas/core/tests/test_searchvector_command.py b/jarbas/core/tests/test_searchvector_command.py new file mode 100644 index 000000000..2c7719d7d --- /dev/null +++ b/jarbas/core/tests/test_searchvector_command.py @@ -0,0 +1,17 @@ +from unittest.mock import patch + +from django.test import TestCase + +from jarbas.core.management.commands.searchvector import Command +from jarbas.core.models import Reimbursement + + +class TestCommandHandler(TestCase): + + @patch.object(Reimbursement.objects, 'update') + @patch('jarbas.core.management.commands.searchvector.print') + def test_handler(self, print_, update): + command = Command() + command.handle() + self.assertEqual(2, print_.call_count) + self.assertEqual(1, update.call_count) diff --git a/jarbas/core/tests/test_tweet_model.py b/jarbas/core/tests/test_tweet_model.py index 3cf1bd8d2..fb45dabc2 100644 --- a/jarbas/core/tests/test_tweet_model.py +++ b/jarbas/core/tests/test_tweet_model.py @@ -7,10 +7,10 @@ class TestTweet(TestCase): def setUp(self): - self.tweet = mixer.blend(Tweet, status=42) + self.tweet = mixer.blend(Tweet, reimbursement__search_vector=None, status=42) def test_ordering(self): - mixer.blend(Tweet, status=1) + mixer.blend(Tweet, reimbursement__search_vector=None, status=1) self.assertEqual(42, Tweet.objects.first().status) def test_get_url(self): diff --git a/jarbas/core/tests/test_tweets_command.py b/jarbas/core/tests/test_tweets_command.py index 6c2130a83..4d5467b24 100644 --- a/jarbas/core/tests/test_tweets_command.py +++ b/jarbas/core/tests/test_tweets_command.py @@ -71,7 +71,7 @@ def test_non_existent_reimbursement(self, save_tweet, document_ids): @patch.object(Command, 'document_ids', new_callable=PropertyMock) @patch.object(Command, 'save_tweet') def test_existing_tweet(self, save_tweet, document_ids): - reimbursement = mixer.blend(Reimbursement, document_id=123456) + reimbursement = mixer.blend(Reimbursement, search_vector=None, document_id=123456) mixer.blend(Tweet, status=42, reimbursement=reimbursement) document_ids.return_value = ((42, 123456),) @@ -82,7 +82,7 @@ def test_existing_tweet(self, save_tweet, document_ids): @patch.object(Command, 'document_ids', new_callable=PropertyMock) @patch.object(Command, 'save_tweet') def test_new_tweet(self, save_tweet, document_ids): - obj = mixer.blend(Reimbursement, document_id=123456) + obj = mixer.blend(Reimbursement, search_vector=None, document_id=123456) document_ids.return_value = ((42, 123456),) with self.settings(**self.credentials): Command().handle() @@ -109,7 +109,7 @@ def test_get_document_id(self): def test_save_tweet(self): status = 9999999999999999999999999 - reimbursement = mixer.blend(Reimbursement) + reimbursement = mixer.blend(Reimbursement, search_vector=None) command = Command() command.log = MagicMock() command.save_tweet(reimbursement, status) @@ -119,7 +119,7 @@ def test_save_tweet(self): def test_save_duplicated_tweet(self): status = 9999999999999999999999999 - reimbursement = mixer.blend(Reimbursement) + reimbursement = mixer.blend(Reimbursement, search_vector=None) tweet = mixer.blend(Tweet, status=status, reimbursement=reimbursement) command = Command() command.log = MagicMock() @@ -150,7 +150,11 @@ def test_tweets_with_clean_database(self, api): @patch('jarbas.core.management.commands.tweets.twitter.Api') def test_tweets_with_database(self, api): - tweet = mixer.blend(Tweet, status=random_tweet_status()) + tweet = mixer.blend( + Tweet, + reimbursement__search_vector=None, + status=random_tweet_status() + ) api.return_value.GetUserTimeline.return_value = range(3) with self.settings(**self.credentials): command = Command() diff --git a/jarbas/dashboard/admin.py b/jarbas/dashboard/admin.py index 63a21e2eb..cba1f5de0 100644 --- a/jarbas/dashboard/admin.py +++ b/jarbas/dashboard/admin.py @@ -4,6 +4,8 @@ from brazilnum.cnpj import format_cnpj from brazilnum.cpf import format_cpf from django.contrib.admin import SimpleListFilter +from django.contrib.postgres.search import SearchQuery, SearchRank +from django.db.models import F from django.forms.widgets import Widget from simple_history.admin import SimpleHistoryAdmin @@ -247,16 +249,7 @@ class ReimbursementModelAdmin(SimpleHistoryAdmin): # 'still_available', ) - search_fields = ( - 'applicant_id', - 'cnpj_cpf', - 'congressperson_name', - 'document_id', - 'party', - 'state', - 'supplier', - 'subquota_description', - ) + search_fields = ('search_vector',) list_filter = ( SuspiciousListFilter, @@ -376,5 +369,18 @@ def formfield_for_dbfield(self, db_field, **kwargs): kwargs['widget'] = widgets.get(db_field.name) return super().formfield_for_dbfield(db_field, **kwargs) + def get_search_results(self, request, queryset, search_term): + if not search_term: + return super(ReimbursementModelAdmin, self) \ + .get_search_results(request, queryset, search_term) + + query = SearchQuery(search_term, config='portuguese') + rank = SearchRank(F('search_vector'), query) + queryset = Reimbursement.objects.annotate(rank=rank) \ + .filter(search_vector=query) \ + .order_by('-rank') + + return queryset, False + dashboard.register(Reimbursement, ReimbursementModelAdmin) diff --git a/jarbas/dashboard/tests/test_dashboard_view.py b/jarbas/dashboard/tests/test_dashboard_view.py index 3632a1824..5928d0ee8 100644 --- a/jarbas/dashboard/tests/test_dashboard_view.py +++ b/jarbas/dashboard/tests/test_dashboard_view.py @@ -10,7 +10,7 @@ class TestDashboard(TestCase): def setUp(self): - obj = mixer.blend(Reimbursement) + obj = mixer.blend(Reimbursement, search_vector=None) self.urls = ( resolve_url('dashboard:index'), resolve_url('dashboard:core_reimbursement_changelist'),