From 64f676e5532464a8da954b2ce81b355141c38706 Mon Sep 17 00:00:00 2001 From: XiongNeng Date: Mon, 25 Apr 2016 14:25:17 +0800 Subject: [PATCH] =?UTF-8?q?=E5=85=A8=E6=96=87=E6=90=9C=E7=B4=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- blog/search_indexes.py | 3 +- blog/templates/blog/post_search_list.html | 3 - blog/urls.py | 2 +- blog/views.py | 16 ++-- blog/whoosh_cn_backend.py | 101 ++++++++++++---------- mysite/settings.py | 16 ++-- mysite/templates/mysite/base.html | 2 +- mysite/urls.py | 2 +- 8 files changed, 77 insertions(+), 68 deletions(-) diff --git a/blog/search_indexes.py b/blog/search_indexes.py index ba966b4..dd50e25 100644 --- a/blog/search_indexes.py +++ b/blog/search_indexes.py @@ -9,8 +9,9 @@ class PostIndex(indexes.SearchIndex, indexes.Indexable): + # 文章内容 text = indexes.CharField(document=True, use_template=True) - # 对title字段进行索引 + # 对标题字段进行索引 title = indexes.CharField(model_attr='title') def get_model(self): diff --git a/blog/templates/blog/post_search_list.html b/blog/templates/blog/post_search_list.html index 30292ee..9668d52 100644 --- a/blog/templates/blog/post_search_list.html +++ b/blog/templates/blog/post_search_list.html @@ -1,15 +1,12 @@ {% extends 'mysite/base.html' %} {% load blog_tags %} -{% load pagination_tags %} {% block content %} {% if list_header %}
{{ list_header }}
{% endif %} - {% autopaginate posts 10 %} {% for post in posts %}

{{ post.title }}

{% endfor %} - {% paginate %} {% endblock %} diff --git a/blog/urls.py b/blog/urls.py index 587a56c..237465c 100644 --- a/blog/urls.py +++ b/blog/urls.py @@ -4,7 +4,7 @@ urlpatterns = [ url(r'^$', post_list, name='home'), - # url(r'^posts/search/$', full_search, name='full_search'), + url(r'^posts/search/$', full_search, name='full_search'), url(r'^posts/tag/(?P\w+)$', post_list_by_tag, name='list_by_tag'), url(r'^posts/category/(?P\w+)$', post_list_by_category, name='list_by_cg'), url(r'^posts/archive/(?P[0-9]{4})/(?P[0-9]{1,2})$', post_list_by_ym, name='list_by_ym'), diff --git a/blog/views.py b/blog/views.py index 927bd08..f98f320 100644 --- a/blog/views.py +++ b/blog/views.py @@ -14,17 +14,17 @@ import json -# from haystack.forms import SearchForm +from haystack.forms import SearchForm # import qiniu -# def full_search(request): -# """全局搜索""" -# keywords = request.GET['q'] -# sform = SearchForm(request.GET) -# posts = sform.search() -# return render(request, 'blog/post_search_list.html', -# {'posts': posts, 'list_header': '关键字 \'{}\' 搜索结果'.format(keywords)}) +def full_search(request): + """全局搜索""" + keywords = request.GET['q'] + sform = SearchForm(request.GET) + posts = sform.search() + return render(request, 'blog/post_search_list.html', + {'posts': posts, 'list_header': '关键字 \'{}\' 搜索结果'.format(keywords)}) def get_client_ip(request): diff --git a/blog/whoosh_cn_backend.py b/blog/whoosh_cn_backend.py index 5e27b39..e1ac005 100644 --- a/blog/whoosh_cn_backend.py +++ b/blog/whoosh_cn_backend.py @@ -1,24 +1,27 @@ -from __future__ import unicode_literals +# encoding: utf-8 + +from __future__ import absolute_import, division, print_function, unicode_literals + import os import re import shutil import threading import warnings -from jieba.analyse import ChineseAnalyzer from django.conf import settings from django.core.exceptions import ImproperlyConfigured -from django.db.models.loading import get_model from django.utils import six from django.utils.datetime_safe import datetime -from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query, EmptyResults -from haystack.constants import ID, DJANGO_CT, DJANGO_ID -from haystack.exceptions import MissingDependency, SearchBackendError -from haystack.inputs import PythonData, Clean, Exact, Raw + +from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query +from haystack.constants import DJANGO_CT, DJANGO_ID, ID +from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument +from haystack.inputs import Clean, Exact, PythonData, Raw from haystack.models import SearchResult -from haystack.utils import get_identifier from haystack.utils import log as logging -from haystack.utils import get_model_ct +from haystack.utils import get_identifier, get_model_ct +from haystack.utils.app_loading import haystack_get_model +from jieba.analyse import ChineseAnalyzer try: import json @@ -38,20 +41,21 @@ except ImportError: raise MissingDependency("The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.") +# Handle minimum requirement. +if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0): + raise MissingDependency("The 'whoosh' backend requires version 2.5.0 or greater.") + # Bubble up the correct error. from whoosh import index from whoosh.analysis import StemmingAnalyzer from whoosh.fields import ID as WHOOSH_ID -from whoosh.fields import Schema, IDLIST, TEXT, KEYWORD, NUMERIC, BOOLEAN, DATETIME, NGRAM, NGRAMWORDS +from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT from whoosh.filedb.filestore import FileStorage, RamStorage +from whoosh.highlight import highlight as whoosh_highlight +from whoosh.highlight import ContextFragmenter, HtmlFormatter from whoosh.qparser import QueryParser from whoosh.searching import ResultsPage from whoosh.writing import AsyncWriter -from whoosh.highlight import HtmlFormatter, highlight as whoosh_highlight, ContextFragmenter - -# Handle minimum requirement. -if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0): - raise MissingDependency("The 'whoosh' backend requires version 2.5.0 or greater.") DATETIME_REGEX = re.compile('^(?P\d{4})-(?P\d{2})-(?P\d{2})T(?P\d{2}):(?P\d{2}):(?P\d{2})(\.\d{3,6}Z?)?$') @@ -189,32 +193,32 @@ def update(self, index, iterable, commit=True): writer = AsyncWriter(self.index) for obj in iterable: - doc = index.full_prepare(obj) - - # Really make sure it's unicode, because Whoosh won't have it any - # other way. - for key in doc: - doc[key] = self._from_python(doc[key]) - - # Document boosts aren't supported in Whoosh 2.5.0+. - if 'boost' in doc: - del doc['boost'] - try: - writer.update_document(**doc) - except Exception as e: - if not self.silently_fail: - raise - - # We'll log the object identifier but won't include the actual object - # to avoid the possibility of that generating encoding errors while - # processing the log message: - self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={ - "data": { - "index": index, - "object": get_identifier(obj) - } - }) + doc = index.full_prepare(obj) + except SkipDocument: + self.log.debug(u"Indexing for object `%s` skipped", obj) + else: + # Really make sure it's unicode, because Whoosh won't have it any + # other way. + for key in doc: + doc[key] = self._from_python(doc[key]) + + # Document boosts aren't supported in Whoosh 2.5.0+. + if 'boost' in doc: + del doc['boost'] + + try: + writer.update_document(**doc) + except Exception as e: + if not self.silently_fail: + raise + + # We'll log the object identifier but won't include the actual object + # to avoid the possibility of that generating encoding errors while + # processing the log message: + self.log.error(u"%s while preparing object for update" % e.__class__.__name__, + exc_info=True, extra={"data": {"index": index, + "object": get_identifier(obj)}}) if len(iterable) > 0: # For now, commit no matter what, as we run into locking issues otherwise. @@ -233,16 +237,19 @@ def remove(self, obj_or_string, commit=True): if not self.silently_fail: raise - self.log.error("Failed to remove document '%s' from Whoosh: %s", whoosh_id, e) + self.log.error("Failed to remove document '%s' from Whoosh: %s", whoosh_id, e, exc_info=True) - def clear(self, models=[], commit=True): + def clear(self, models=None, commit=True): if not self.setup_complete: self.setup() self.index = self.index.refresh() + if models is not None: + assert isinstance(models, (list, tuple)) + try: - if not models: + if models is None: self.delete_index() else: models_to_delete = [] @@ -255,7 +262,11 @@ def clear(self, models=[], commit=True): if not self.silently_fail: raise - self.log.error("Failed to clear documents from Whoosh: %s", e) + if models is not None: + self.log.error("Failed to clear Whoosh index of models '%s': %s", ','.join(models_to_delete), + e, exc_info=True) + else: + self.log.error("Failed to clear Whoosh index: %s", e, exc_info=True) def delete_index(self): # Per the Whoosh mailing list, if wiping out everything from the index, @@ -603,7 +614,7 @@ def _process_results(self, raw_page, highlight=False, query_string='', spelling_ score = raw_page.score(doc_offset) or 0 app_label, model_name = raw_result[DJANGO_CT].split('.') additional_fields = {} - model = get_model(app_label, model_name) + model = haystack_get_model(app_label, model_name) if model and model in indexed_models: for key, value in raw_result.items(): diff --git a/mysite/settings.py b/mysite/settings.py index f0f7276..59a47cb 100644 --- a/mysite/settings.py +++ b/mysite/settings.py @@ -40,7 +40,7 @@ 'django.contrib.staticfiles', 'django.contrib.admin', # 'xadmin', - # 'haystack', + 'haystack', 'crispy_forms', 'reversion', 'blog', @@ -148,13 +148,13 @@ # SESSION_CACHE_ALIAS = 'default' # full text search -# HAYSTACK_CONNECTIONS = { -# 'default': { -# 'ENGINE': 'blog.whoosh_cn_backend.WhooshEngine', -# 'PATH': os.path.join(BASE_DIR, 'whoosh_index'), -# }, -# } -# HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor' +HAYSTACK_CONNECTIONS = { + 'default': { + 'ENGINE': 'blog.whoosh_cn_backend.WhooshEngine', + 'PATH': os.path.join(BASE_DIR, 'whoosh_index'), + }, +} +HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor' # A sample logging configuration. The only tangible logging # performed by this configuration is to send an email to diff --git a/mysite/templates/mysite/base.html b/mysite/templates/mysite/base.html index 37b9a43..62db57f 100644 --- a/mysite/templates/mysite/base.html +++ b/mysite/templates/mysite/base.html @@ -54,7 +54,7 @@

{% trans 'Simple Blog'%}