Skip to content

Commit a5257b5

Browse files
authored
GO-3997 Improve context menu search quality (#1940)
1 parent bf7a60b commit a5257b5

File tree

6 files changed

+160
-70
lines changed

6 files changed

+160
-70
lines changed

core/indexer/fulltext.go

+8-5
Original file line numberDiff line numberDiff line change
@@ -178,13 +178,16 @@ func (i *indexer) prepareSearchDocument(ctx context.Context, id string) (docs []
178178
Text: val,
179179
}
180180

181-
layout, layoutValid := sb.Layout()
182-
if layoutValid {
183-
if _, contains := filesLayouts[layout]; !contains {
184-
doc.Title = val
185-
doc.Text = ""
181+
if rel.Key == bundle.RelationKeyName.String() {
182+
layout, layoutValid := sb.Layout()
183+
if layoutValid {
184+
if _, contains := filesLayouts[layout]; !contains {
185+
doc.Title = val
186+
doc.Text = ""
187+
}
186188
}
187189
}
190+
188191
docs = append(docs, doc)
189192
}
190193

core/object.go

+7-6
Original file line numberDiff line numberDiff line change
@@ -89,12 +89,13 @@ func (mw *Middleware) ObjectSearch(cctx context.Context, req *pb.RpcObjectSearch
8989

9090
ds := mw.applicationService.GetApp().MustComponent(objectstore.CName).(objectstore.ObjectStore)
9191
records, err := ds.SpaceIndex(req.SpaceId).Query(database.Query{
92-
Filters: req.Filters,
93-
SpaceId: req.SpaceId,
94-
Sorts: req.Sorts,
95-
Offset: int(req.Offset),
96-
Limit: int(req.Limit),
97-
TextQuery: req.FullText,
92+
Filters: req.Filters,
93+
SpaceId: req.SpaceId,
94+
Sorts: req.Sorts,
95+
Offset: int(req.Offset),
96+
Limit: int(req.Limit),
97+
TextQuery: req.FullText,
98+
PrefixNameQuery: true,
9899
})
99100
if err != nil {
100101
return response(pb.RpcObjectSearchResponseError_UNKNOWN_ERROR, nil, err)

pkg/lib/database/database.go

+7-6
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,13 @@ type Record struct {
2525
}
2626

2727
type Query struct {
28-
TextQuery string
29-
SpaceId string
30-
Filters []*model.BlockContentDataviewFilter // filters results. apply sequentially
31-
Sorts []*model.BlockContentDataviewSort // order results. apply hierarchically
32-
Limit int // maximum number of results
33-
Offset int // skip given number of results
28+
TextQuery string
29+
SpaceId string
30+
Filters []*model.BlockContentDataviewFilter // filters results. apply sequentially
31+
Sorts []*model.BlockContentDataviewSort // order results. apply hierarchically
32+
Limit int // maximum number of results
33+
Offset int // skip given number of results
34+
PrefixNameQuery bool
3435
}
3536

3637
func injectDefaultFilters(filters []*model.BlockContentDataviewFilter) []*model.BlockContentDataviewFilter {

pkg/lib/localstore/ftsearch/ftsearchtantivy.go

+89-50
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030

3131
"github.com/anyproto/anytype-heart/core/wallet"
3232
"github.com/anyproto/anytype-heart/metrics"
33+
"github.com/anyproto/anytype-heart/pkg/lib/bundle"
3334
"github.com/anyproto/anytype-heart/pkg/lib/logging"
3435
"github.com/anyproto/anytype-heart/util/text"
3536
)
@@ -38,7 +39,7 @@ const (
3839
CName = "fts"
3940
ftsDir = "fts"
4041
ftsDir2 = "fts_tantivy"
41-
ftsVer = "10"
42+
ftsVer = "11"
4243
docLimit = 10000
4344

4445
fieldTitle = "Title"
@@ -63,7 +64,9 @@ type FTSearch interface {
6364
NewAutoBatcher() AutoBatcher
6465
BatchIndex(ctx context.Context, docs []SearchDoc, deletedDocs []string) (err error)
6566
BatchDeleteObjects(ids []string) (err error)
66-
Search(spaceIds string, query string) (results []*DocumentMatch, err error)
67+
Search(spaceId string, query string) (results []*DocumentMatch, err error)
68+
// NamePrefixSearch special prefix case search
69+
NamePrefixSearch(spaceId string, query string) (results []*DocumentMatch, err error)
6770
Iterate(objectId string, fields []string, shouldContinue func(doc *SearchDoc) bool) (err error)
6871
DeleteObject(id string) error
6972
DocCount() (uint64, error)
@@ -369,7 +372,15 @@ func (f *ftSearchTantivy) BatchIndex(ctx context.Context, docs []SearchDoc, dele
369372
return f.index.AddAndConsumeDocuments(tantivyDocs...)
370373
}
371374

372-
func (f *ftSearchTantivy) Search(spaceId string, query string) (results []*DocumentMatch, err error) {
375+
func (f *ftSearchTantivy) NamePrefixSearch(spaceId, query string) ([]*DocumentMatch, error) {
376+
return f.performSearch(spaceId, query, f.buildObjectQuery)
377+
}
378+
379+
func (f *ftSearchTantivy) Search(spaceId, query string) ([]*DocumentMatch, error) {
380+
return f.performSearch(spaceId, query, f.buildDetailedQuery)
381+
}
382+
383+
func (f *ftSearchTantivy) performSearch(spaceId, query string, buildQueryFunc func(*tantivy.QueryBuilder, string)) ([]*DocumentMatch, error) {
373384
query = prepareQuery(query)
374385
if query == "" {
375386
return nil, nil
@@ -380,10 +391,55 @@ func (f *ftSearchTantivy) Search(spaceId string, query string) (results []*Docum
380391
qb.Query(tantivy.Must, fieldSpace, spaceId, tantivy.TermQuery, 1.0)
381392
}
382393

394+
buildQueryFunc(qb, query)
395+
396+
finalQuery := qb.Build()
397+
sCtx := tantivy.NewSearchContextBuilder().
398+
SetQueryFromJson(&finalQuery).
399+
SetDocsLimit(100).
400+
SetWithHighlights(true).
401+
Build()
402+
403+
result, err := f.index.SearchJson(sCtx)
404+
if err != nil {
405+
return nil, wrapError(err)
406+
}
407+
408+
p := f.parserPool.Get()
409+
defer f.parserPool.Put(p)
410+
411+
return tantivy.GetSearchResults(
412+
result,
413+
f.schema,
414+
func(json string) (*DocumentMatch, error) {
415+
return parseSearchResult(json, p)
416+
},
417+
fieldId,
418+
)
419+
}
420+
421+
func (f *ftSearchTantivy) buildObjectQuery(qb *tantivy.QueryBuilder, query string) {
422+
qb.Query(tantivy.Must, fieldId, bundle.RelationKeyName.String(), tantivy.TermQuery, 1.0)
383423
if containsChineseCharacters(query) {
384424
qb.BooleanQuery(tantivy.Must, qb.NestedBuilder().
385-
Query(tantivy.Should, fieldTitleZh, query, tantivy.PhrasePrefixQuery, 5.0).
386-
Query(tantivy.Should, fieldTitleZh, query, tantivy.PhraseQuery, 5.0).
425+
Query(tantivy.Should, fieldTitleZh, query, tantivy.PhrasePrefixQuery, 1.0).
426+
Query(tantivy.Should, fieldTextZh, query, tantivy.PhrasePrefixQuery, 1.0),
427+
1.0,
428+
)
429+
} else {
430+
qb.BooleanQuery(tantivy.Must, qb.NestedBuilder().
431+
Query(tantivy.Should, fieldTitle, query, tantivy.PhrasePrefixQuery, 1.0).
432+
Query(tantivy.Should, fieldText, query, tantivy.PhrasePrefixQuery, 1.0),
433+
1.0,
434+
)
435+
}
436+
}
437+
438+
func (f *ftSearchTantivy) buildDetailedQuery(qb *tantivy.QueryBuilder, query string) {
439+
if containsChineseCharacters(query) {
440+
qb.BooleanQuery(tantivy.Must, qb.NestedBuilder().
441+
Query(tantivy.Should, fieldTitleZh, query, tantivy.PhrasePrefixQuery, 10.0).
442+
Query(tantivy.Should, fieldTitleZh, query, tantivy.PhraseQuery, 10.0).
387443
Query(tantivy.Should, fieldTitleZh, query, tantivy.EveryTermQuery, 0.75).
388444
Query(tantivy.Should, fieldTitleZh, query, tantivy.OneOfTermQuery, 0.5).
389445
Query(tantivy.Should, fieldTextZh, query, tantivy.PhrasePrefixQuery, 1.0).
@@ -405,58 +461,41 @@ func (f *ftSearchTantivy) Search(spaceId string, query string) (results []*Docum
405461
1.0,
406462
)
407463
}
464+
}
408465

409-
finalQuery := qb.Build()
410-
sCtx := tantivy.NewSearchContextBuilder().
411-
SetQueryFromJson(&finalQuery).
412-
SetDocsLimit(100).
413-
SetWithHighlights(true).
414-
Build()
415-
416-
result, err := f.index.SearchJson(sCtx)
417-
466+
func parseSearchResult(json string, parser *fastjson.Parser) (*DocumentMatch, error) {
467+
value, err := parser.Parse(json)
418468
if err != nil {
419469
return nil, wrapError(err)
420470
}
421-
p := f.parserPool.Get()
422-
defer f.parserPool.Put(p)
423471

424-
return tantivy.GetSearchResults(
425-
result,
426-
f.schema,
427-
func(json string) (*DocumentMatch, error) {
428-
value, err := p.Parse(json)
429-
if err != nil {
430-
return nil, err
431-
}
432-
highlights := value.GetArray(highlights)
433-
434-
fragments := map[string]*Highlight{}
435-
for _, val := range highlights {
436-
object := val.GetObject()
437-
fieldName := string(object.Get(fieldNameTxt).GetStringBytes())
438-
if fieldName == fieldTitle || fieldName == fieldTitleZh {
439-
fragments = map[string]*Highlight{}
440-
break
441-
}
442-
if fieldName == fieldText || fieldName == fieldTextZh {
443-
extractHighlight(object, fragments, fieldName)
444-
}
445-
}
472+
highlights := value.GetArray(highlights)
473+
fragments := map[string]*Highlight{}
446474

447-
if len(fragments) == 2 {
448-
delete(fragments, fieldTextZh)
449-
// remove chinese if something non chinese presents
450-
}
475+
for _, val := range highlights {
476+
object := val.GetObject()
477+
fieldName := string(object.Get(fieldNameTxt).GetStringBytes())
451478

452-
return &DocumentMatch{
453-
Score: value.GetFloat64(score),
454-
ID: string(value.GetStringBytes(fieldId)),
455-
Fragments: fragments,
456-
}, nil
457-
},
458-
fieldId,
459-
)
479+
if fieldName == fieldTitle || fieldName == fieldTitleZh {
480+
fragments = map[string]*Highlight{}
481+
break
482+
}
483+
484+
if fieldName == fieldText || fieldName == fieldTextZh {
485+
extractHighlight(object, fragments, fieldName)
486+
}
487+
}
488+
489+
if len(fragments) == 2 {
490+
// Remove Chinese highlights if non-Chinese highlights are present
491+
delete(fragments, fieldTextZh)
492+
}
493+
494+
return &DocumentMatch{
495+
Score: value.GetFloat64(score),
496+
ID: string(value.GetStringBytes(fieldId)),
497+
Fragments: fragments,
498+
}, nil
460499
}
461500

462501
func containsChineseCharacters(s string) bool {

pkg/lib/localstore/ftsearch/ftsearchtantivy_test.go

+36
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,42 @@ func TestDifferentSpaces(t *testing.T) {
9090
_ = ft.Close(nil)
9191
}
9292

93+
func TestNamePrefixSearch(t *testing.T) {
94+
tmpDir, _ := os.MkdirTemp("", "")
95+
fixture := newFixture(tmpDir, t)
96+
ft := fixture.ft
97+
require.NoError(t, ft.Index(SearchDoc{
98+
Id: "id1/r/name",
99+
Title: "opa",
100+
}))
101+
require.NoError(t, ft.Index(SearchDoc{
102+
Id: "id2/r/name",
103+
Text: "opa",
104+
}))
105+
require.NoError(t, ft.Index(SearchDoc{
106+
Id: "id3/r/desc",
107+
Title: "one",
108+
}))
109+
require.NoError(t, ft.Index(SearchDoc{
110+
Id: "id4/r/desc",
111+
Text: "opa",
112+
}))
113+
require.NoError(t, ft.Index(SearchDoc{
114+
Id: "id5/r/desc",
115+
Text: "noone",
116+
}))
117+
118+
search, err := ft.NamePrefixSearch("", "o")
119+
require.NoError(t, err)
120+
require.Len(t, search, 2)
121+
122+
search, err = ft.NamePrefixSearch("", "n")
123+
require.NoError(t, err)
124+
require.Len(t, search, 0)
125+
126+
_ = ft.Close(nil)
127+
}
128+
93129
func TestNewFTSearch(t *testing.T) {
94130
testCases := []struct {
95131
name string

pkg/lib/localstore/objectstore/spaceindex/queries.go

+13-3
Original file line numberDiff line numberDiff line change
@@ -270,7 +270,17 @@ func (s *dsObjectStore) performQuery(q database.Query) (records []database.Recor
270270
return nil, fmt.Errorf("new filters: %w", err)
271271
}
272272
if q.TextQuery != "" {
273-
fulltextResults, err := s.performFulltextSearch(q.TextQuery, q.SpaceId)
273+
var fulltextResults []database.FulltextResult
274+
if q.PrefixNameQuery {
275+
fulltextResults, err = s.performFulltextSearch(func() (results []*ftsearch.DocumentMatch, err error) {
276+
return s.fts.NamePrefixSearch(q.SpaceId, q.TextQuery)
277+
})
278+
} else {
279+
fulltextResults, err = s.performFulltextSearch(func() (results []*ftsearch.DocumentMatch, err error) {
280+
return s.fts.Search(q.SpaceId, q.TextQuery)
281+
})
282+
}
283+
274284
if err != nil {
275285
return nil, fmt.Errorf("perform fulltext search: %w", err)
276286
}
@@ -280,8 +290,8 @@ func (s *dsObjectStore) performQuery(q database.Query) (records []database.Recor
280290
return s.QueryRaw(filters, q.Limit, q.Offset)
281291
}
282292

283-
func (s *dsObjectStore) performFulltextSearch(text string, spaceId string) ([]database.FulltextResult, error) {
284-
ftsResults, err := s.fts.Search(spaceId, text)
293+
func (s *dsObjectStore) performFulltextSearch(search func() (results []*ftsearch.DocumentMatch, err error)) ([]database.FulltextResult, error) {
294+
ftsResults, err := search()
285295
if err != nil {
286296
return nil, fmt.Errorf("fullText search: %w", err)
287297
}

0 commit comments

Comments
 (0)