Skip to content

Commit

Permalink
全文搜索
Browse files Browse the repository at this point in the history
  • Loading branch information
yidao620c committed Apr 25, 2016
1 parent 29d563f commit 64f676e
Show file tree
Hide file tree
Showing 8 changed files with 77 additions and 68 deletions.
3 changes: 2 additions & 1 deletion blog/search_indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@


class PostIndex(indexes.SearchIndex, indexes.Indexable):
# 文章内容
text = indexes.CharField(document=True, use_template=True)
# 对title字段进行索引
# 对标题字段进行索引
title = indexes.CharField(model_attr='title')

def get_model(self):
Expand Down
3 changes: 0 additions & 3 deletions blog/templates/blog/post_search_list.html
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
{% extends 'mysite/base.html' %}
{% load blog_tags %}
{% load pagination_tags %}
{% block content %}
{% if list_header %}
<div class="box">{{ list_header }}</div>
{% endif %}
{% autopaginate posts 10 %}
{% for post in posts %}
<div class="post">
<h2><a href="{% url 'blog.views.post_detail' pk=post.pk %}">{{ post.title }}</a></h2>
</div>
{% endfor %}
{% paginate %}
{% endblock %}
2 changes: 1 addition & 1 deletion blog/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

urlpatterns = [
url(r'^$', post_list, name='home'),
# url(r'^posts/search/$', full_search, name='full_search'),
url(r'^posts/search/$', full_search, name='full_search'),
url(r'^posts/tag/(?P<tag>\w+)$', post_list_by_tag, name='list_by_tag'),
url(r'^posts/category/(?P<cg>\w+)$', post_list_by_category, name='list_by_cg'),
url(r'^posts/archive/(?P<y>[0-9]{4})/(?P<m>[0-9]{1,2})$', post_list_by_ym, name='list_by_ym'),
Expand Down
16 changes: 8 additions & 8 deletions blog/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@
import json


# from haystack.forms import SearchForm
from haystack.forms import SearchForm
# import qiniu


# def full_search(request):
# """全局搜索"""
# keywords = request.GET['q']
# sform = SearchForm(request.GET)
# posts = sform.search()
# return render(request, 'blog/post_search_list.html',
# {'posts': posts, 'list_header': '关键字 \'{}\' 搜索结果'.format(keywords)})
def full_search(request):
"""全局搜索"""
keywords = request.GET['q']
sform = SearchForm(request.GET)
posts = sform.search()
return render(request, 'blog/post_search_list.html',
{'posts': posts, 'list_header': '关键字 \'{}\' 搜索结果'.format(keywords)})


def get_client_ip(request):
Expand Down
101 changes: 56 additions & 45 deletions blog/whoosh_cn_backend.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,27 @@
from __future__ import unicode_literals
# encoding: utf-8

from __future__ import absolute_import, division, print_function, unicode_literals

import os
import re
import shutil
import threading
import warnings
from jieba.analyse import ChineseAnalyzer

from django.conf import settings
from django.core.exceptions import ImproperlyConfigured
from django.db.models.loading import get_model
from django.utils import six
from django.utils.datetime_safe import datetime
from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, log_query, EmptyResults
from haystack.constants import ID, DJANGO_CT, DJANGO_ID
from haystack.exceptions import MissingDependency, SearchBackendError
from haystack.inputs import PythonData, Clean, Exact, Raw

from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, EmptyResults, log_query
from haystack.constants import DJANGO_CT, DJANGO_ID, ID
from haystack.exceptions import MissingDependency, SearchBackendError, SkipDocument
from haystack.inputs import Clean, Exact, PythonData, Raw
from haystack.models import SearchResult
from haystack.utils import get_identifier
from haystack.utils import log as logging
from haystack.utils import get_model_ct
from haystack.utils import get_identifier, get_model_ct
from haystack.utils.app_loading import haystack_get_model
from jieba.analyse import ChineseAnalyzer

try:
import json
Expand All @@ -38,20 +41,21 @@
except ImportError:
raise MissingDependency("The 'whoosh' backend requires the installation of 'Whoosh'. Please refer to the documentation.")

# Handle minimum requirement.
if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
raise MissingDependency("The 'whoosh' backend requires version 2.5.0 or greater.")

# Bubble up the correct error.
from whoosh import index
from whoosh.analysis import StemmingAnalyzer
from whoosh.fields import ID as WHOOSH_ID
from whoosh.fields import Schema, IDLIST, TEXT, KEYWORD, NUMERIC, BOOLEAN, DATETIME, NGRAM, NGRAMWORDS
from whoosh.fields import BOOLEAN, DATETIME, IDLIST, KEYWORD, NGRAM, NGRAMWORDS, NUMERIC, Schema, TEXT
from whoosh.filedb.filestore import FileStorage, RamStorage
from whoosh.highlight import highlight as whoosh_highlight
from whoosh.highlight import ContextFragmenter, HtmlFormatter
from whoosh.qparser import QueryParser
from whoosh.searching import ResultsPage
from whoosh.writing import AsyncWriter
from whoosh.highlight import HtmlFormatter, highlight as whoosh_highlight, ContextFragmenter

# Handle minimum requirement.
if not hasattr(whoosh, '__version__') or whoosh.__version__ < (2, 5, 0):
raise MissingDependency("The 'whoosh' backend requires version 2.5.0 or greater.")


DATETIME_REGEX = re.compile('^(?P<year>\d{4})-(?P<month>\d{2})-(?P<day>\d{2})T(?P<hour>\d{2}):(?P<minute>\d{2}):(?P<second>\d{2})(\.\d{3,6}Z?)?$')
Expand Down Expand Up @@ -189,32 +193,32 @@ def update(self, index, iterable, commit=True):
writer = AsyncWriter(self.index)

for obj in iterable:
doc = index.full_prepare(obj)

# Really make sure it's unicode, because Whoosh won't have it any
# other way.
for key in doc:
doc[key] = self._from_python(doc[key])

# Document boosts aren't supported in Whoosh 2.5.0+.
if 'boost' in doc:
del doc['boost']

try:
writer.update_document(**doc)
except Exception as e:
if not self.silently_fail:
raise

# We'll log the object identifier but won't include the actual object
# to avoid the possibility of that generating encoding errors while
# processing the log message:
self.log.error(u"%s while preparing object for update" % e.__class__.__name__, exc_info=True, extra={
"data": {
"index": index,
"object": get_identifier(obj)
}
})
doc = index.full_prepare(obj)
except SkipDocument:
self.log.debug(u"Indexing for object `%s` skipped", obj)
else:
# Really make sure it's unicode, because Whoosh won't have it any
# other way.
for key in doc:
doc[key] = self._from_python(doc[key])

# Document boosts aren't supported in Whoosh 2.5.0+.
if 'boost' in doc:
del doc['boost']

try:
writer.update_document(**doc)
except Exception as e:
if not self.silently_fail:
raise

# We'll log the object identifier but won't include the actual object
# to avoid the possibility of that generating encoding errors while
# processing the log message:
self.log.error(u"%s while preparing object for update" % e.__class__.__name__,
exc_info=True, extra={"data": {"index": index,
"object": get_identifier(obj)}})

if len(iterable) > 0:
# For now, commit no matter what, as we run into locking issues otherwise.
Expand All @@ -233,16 +237,19 @@ def remove(self, obj_or_string, commit=True):
if not self.silently_fail:
raise

self.log.error("Failed to remove document '%s' from Whoosh: %s", whoosh_id, e)
self.log.error("Failed to remove document '%s' from Whoosh: %s", whoosh_id, e, exc_info=True)

def clear(self, models=[], commit=True):
def clear(self, models=None, commit=True):
if not self.setup_complete:
self.setup()

self.index = self.index.refresh()

if models is not None:
assert isinstance(models, (list, tuple))

try:
if not models:
if models is None:
self.delete_index()
else:
models_to_delete = []
Expand All @@ -255,7 +262,11 @@ def clear(self, models=[], commit=True):
if not self.silently_fail:
raise

self.log.error("Failed to clear documents from Whoosh: %s", e)
if models is not None:
self.log.error("Failed to clear Whoosh index of models '%s': %s", ','.join(models_to_delete),
e, exc_info=True)
else:
self.log.error("Failed to clear Whoosh index: %s", e, exc_info=True)

def delete_index(self):
# Per the Whoosh mailing list, if wiping out everything from the index,
Expand Down Expand Up @@ -603,7 +614,7 @@ def _process_results(self, raw_page, highlight=False, query_string='', spelling_
score = raw_page.score(doc_offset) or 0
app_label, model_name = raw_result[DJANGO_CT].split('.')
additional_fields = {}
model = get_model(app_label, model_name)
model = haystack_get_model(app_label, model_name)

if model and model in indexed_models:
for key, value in raw_result.items():
Expand Down
16 changes: 8 additions & 8 deletions mysite/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
'django.contrib.staticfiles',
'django.contrib.admin',
# 'xadmin',
# 'haystack',
'haystack',
'crispy_forms',
'reversion',
'blog',
Expand Down Expand Up @@ -148,13 +148,13 @@
# SESSION_CACHE_ALIAS = 'default'

# full text search
# HAYSTACK_CONNECTIONS = {
# 'default': {
# 'ENGINE': 'blog.whoosh_cn_backend.WhooshEngine',
# 'PATH': os.path.join(BASE_DIR, 'whoosh_index'),
# },
# }
# HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'blog.whoosh_cn_backend.WhooshEngine',
'PATH': os.path.join(BASE_DIR, 'whoosh_index'),
},
}
HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.RealtimeSignalProcessor'

# A sample logging configuration. The only tangible logging
# performed by this configuration is to send an email to
Expand Down
2 changes: 1 addition & 1 deletion mysite/templates/mysite/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ <h1><a href="{% url 'blog.views.post_list' %}">{% trans 'Simple Blog'%}</a></h1>
<!-- menus END -->
<!-- searchbox START -->
<div id="searchbox">
<form action="" method="get">
<form action="{% url 'blog.views.full_search' %}" method="get">
<div class="content">
<label>
<input type="text" class="textfield searchtip" name="q" size="24" value="">
Expand Down
2 changes: 1 addition & 1 deletion mysite/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,6 @@
urlpatterns = [
url('^', include('django.contrib.auth.urls')),
url(r'^admin/', include(admin.site.urls)),
# url(r'^search/', include('haystack.urls')),
url(r'^search/', include('haystack.urls')),
url(r'', include('blog.urls')),
]

0 comments on commit 64f676e

Please sign in to comment.