Skip to content

Commit 1f5de3e

Browse files
authored
Merge pull request #4781 from cyrusimap/xapian_rename_word_break_flag
xapian_wrap.cc: adapt word break flags to Xapian API change
2 parents 65cef45 + 272d10e commit 1f5de3e

File tree

2 files changed

+36
-9
lines changed

2 files changed

+36
-9
lines changed

Diff for: configure.ac

+19-4
Original file line numberDiff line numberDiff line change
@@ -668,16 +668,31 @@ if test "x$enable_xapian" != xno ; then
668668
AC_LINK_IFELSE(
669669
[AC_LANG_PROGRAM(
670670
[[#include <xapian.h>]],
671-
[[unsigned cjk_flags = Xapian::TermGenerator::FLAG_CJK_WORDS | Xapian::QueryParser::FLAG_CJK_WORDS | Xapian::MSet::SNIPPET_CJK_WORDS; (void) cjk_flags; ]])],
671+
[[unsigned cjk_flags = Xapian::TermGenerator::FLAG_WORD_BREAKS | Xapian::QueryParser::FLAG_WORD_BREAKS | Xapian::MSet::SNIPPET_WORD_BREAKS; (void) cjk_flags; ]])],
672672
[xapian_cjkwords="yes"],
673673
[xapian_cjkwords="no"])
674674
AC_MSG_RESULT($xapian_cjkwords)
675675
if test $xapian_cjkwords = yes; then
676-
AC_DEFINE([USE_XAPIAN_CJK_WORDS], [], [Use Xapian CJK word tokenizer, rather than n-grams?])
676+
AC_DEFINE([USE_XAPIAN_WORD_BREAKS], [], [Use Xapian CJK word tokenizer, rather than n-grams?])
677677
xapian_cjk_tokens=words
678678
else
679-
AC_MSG_NOTICE([Your Xapian does not support CJK word tokenization. CJK ngram tokenization will be used instead.])
680-
xapian_cjk_tokens=ngrams
679+
dnl Xapian upstream version 1.5 used different flag names to enable
680+
dnl word break tokenization until March 2023.
681+
dnl See https://github.com/xapian/xapian/commit/13295e9142f56911d4876fb92271df348759c34b
682+
AC_LINK_IFELSE(
683+
[AC_LANG_PROGRAM(
684+
[[#include <xapian.h>]],
685+
[[unsigned cjk_flags = Xapian::TermGenerator::FLAG_CJK_WORDS | Xapian::QueryParser::FLAG_CJK_WORDS | Xapian::MSet::SNIPPET_CJK_WORDS; (void) cjk_flags; ]])],
686+
[xapian_cjkwords="yes"],
687+
[xapian_cjkwords="no"])
688+
AC_MSG_RESULT($xapian_cjkwords)
689+
if test $xapian_cjkwords = yes; then
690+
AC_DEFINE([USE_XAPIAN_CJK_WORDS], [], [Use Xapian CJK word tokenizer, rather than n-grams?])
691+
xapian_cjk_tokens=words
692+
else
693+
AC_MSG_NOTICE([Your Xapian does not support CJK word tokenization. CJK ngram tokenization will be used instead.])
694+
xapian_cjk_tokens=ngrams
695+
fi
681696
fi
682697
LDFLAGS=$ORIG_LDFLAGS
683698
CXXFLAGS=$ORIG_CXXFLAGS

Diff for: imap/xapian_wrap.cpp

+17-5
Original file line numberDiff line numberDiff line change
@@ -728,7 +728,10 @@ static int xapian_dbw_init(xapian_dbw_t *dbw)
728728
dbw->term_generator = new Xapian::TermGenerator;
729729
dbw->term_generator->set_max_word_length(XAPIAN_MAX_TERM_LENGTH);
730730
/* Always enable CJK word tokenization */
731-
#ifdef USE_XAPIAN_CJK_WORDS
731+
#if defined(USE_XAPIAN_WORD_BREAKS)
732+
dbw->term_generator->set_flags(Xapian::TermGenerator::FLAG_WORD_BREAKS,
733+
~Xapian::TermGenerator::FLAG_WORD_BREAKS);
734+
#elif defined(USE_XAPIAN_CJK_WORDS)
732735
dbw->term_generator->set_flags(Xapian::TermGenerator::FLAG_CJK_WORDS,
733736
~Xapian::TermGenerator::FLAG_CJK_WORDS);
734737
#else
@@ -1897,7 +1900,9 @@ xapian_query_new_match_internal(const xapian_db_t *db, int partnum, const char *
18971900
if (*p > 221) //has highbit
18981901
return new Xapian::Query {db->parser->parse_query(
18991902
str,
1900-
#ifdef USE_XAPIAN_CJK_WORDS
1903+
#if defined(USE_XAPIAN_WORD_BREAKS)
1904+
Xapian::QueryParser::FLAG_WORD_BREAKS,
1905+
#elif defined(USE_XAPIAN_CJK_WORDS)
19011906
Xapian::QueryParser::FLAG_CJK_WORDS,
19021907
#else
19031908
Xapian::QueryParser::FLAG_CJK_NGRAM,
@@ -2182,7 +2187,10 @@ static Xapian::Query xapian_snipgen_build_query(xapian_snipgen_t *snipgen, Xapia
21822187
if (snipgen->loose_terms) {
21832188
/* Add loose query terms */
21842189
term_generator.set_stemmer(stemmer);
2185-
#ifdef USE_XAPIAN_CJK_WORDS
2190+
#if defined(USE_XAPIAN_WORD_BREAKS)
2191+
term_generator.set_flags(Xapian::TermGenerator::FLAG_WORD_BREAKS,
2192+
~Xapian::TermGenerator::FLAG_WORD_BREAKS);
2193+
#elif defined(USE_XAPIAN_CJK_WORDS)
21862194
term_generator.set_flags(Xapian::TermGenerator::FLAG_CJK_WORDS,
21872195
~Xapian::TermGenerator::FLAG_CJK_WORDS);
21882196
#else
@@ -2203,7 +2211,9 @@ static Xapian::Query xapian_snipgen_build_query(xapian_snipgen_t *snipgen, Xapia
22032211
/* Add phrase queries */
22042212
unsigned flags = Xapian::QueryParser::FLAG_PHRASE|
22052213
Xapian::QueryParser::FLAG_WILDCARD|
2206-
#ifdef USE_XAPIAN_CJK_WORDS
2214+
#if defined(USE_XAPIAN_WORD_BREAKS)
2215+
Xapian::QueryParser::FLAG_WORD_BREAKS;
2216+
#elif defined(USE_XAPIAN_CJK_WORDS)
22072217
Xapian::QueryParser::FLAG_CJK_WORDS;
22082218
#else
22092219
Xapian::QueryParser::FLAG_CJK_NGRAM;
@@ -2267,7 +2277,9 @@ EXPORTED int xapian_snipgen_make_snippet(xapian_snipgen_t *snipgen,
22672277

22682278
unsigned flags = Xapian::MSet::SNIPPET_EXHAUSTIVE |
22692279
Xapian::MSet::SNIPPET_EMPTY_WITHOUT_MATCH;
2270-
#ifdef USE_XAPIAN_CJK_WORDS
2280+
#if defined(USE_XAPIAN_WORD_BREAKS)
2281+
flags |= Xapian::MSet::SNIPPET_WORD_BREAKS;
2282+
#elif defined(USE_XAPIAN_CJK_WORDS)
22712283
flags |= Xapian::MSet::SNIPPET_CJK_WORDS;
22722284
#endif
22732285

0 commit comments

Comments
 (0)