@@ -30,6 +30,7 @@ import (
30
30
31
31
"github.com/anyproto/anytype-heart/core/wallet"
32
32
"github.com/anyproto/anytype-heart/metrics"
33
+ "github.com/anyproto/anytype-heart/pkg/lib/bundle"
33
34
"github.com/anyproto/anytype-heart/pkg/lib/logging"
34
35
"github.com/anyproto/anytype-heart/util/text"
35
36
)
@@ -38,7 +39,7 @@ const (
38
39
CName = "fts"
39
40
ftsDir = "fts"
40
41
ftsDir2 = "fts_tantivy"
41
- ftsVer = "10 "
42
+ ftsVer = "11 "
42
43
docLimit = 10000
43
44
44
45
fieldTitle = "Title"
@@ -63,7 +64,9 @@ type FTSearch interface {
63
64
NewAutoBatcher () AutoBatcher
64
65
BatchIndex (ctx context.Context , docs []SearchDoc , deletedDocs []string ) (err error )
65
66
BatchDeleteObjects (ids []string ) (err error )
66
- Search (spaceIds string , query string ) (results []* DocumentMatch , err error )
67
+ Search (spaceId string , query string ) (results []* DocumentMatch , err error )
68
+ // NamePrefixSearch special prefix case search
69
+ NamePrefixSearch (spaceId string , query string ) (results []* DocumentMatch , err error )
67
70
Iterate (objectId string , fields []string , shouldContinue func (doc * SearchDoc ) bool ) (err error )
68
71
DeleteObject (id string ) error
69
72
DocCount () (uint64 , error )
@@ -369,7 +372,15 @@ func (f *ftSearchTantivy) BatchIndex(ctx context.Context, docs []SearchDoc, dele
369
372
return f .index .AddAndConsumeDocuments (tantivyDocs ... )
370
373
}
371
374
372
- func (f * ftSearchTantivy ) Search (spaceId string , query string ) (results []* DocumentMatch , err error ) {
375
+ func (f * ftSearchTantivy ) NamePrefixSearch (spaceId , query string ) ([]* DocumentMatch , error ) {
376
+ return f .performSearch (spaceId , query , f .buildObjectQuery )
377
+ }
378
+
379
+ func (f * ftSearchTantivy ) Search (spaceId , query string ) ([]* DocumentMatch , error ) {
380
+ return f .performSearch (spaceId , query , f .buildDetailedQuery )
381
+ }
382
+
383
+ func (f * ftSearchTantivy ) performSearch (spaceId , query string , buildQueryFunc func (* tantivy.QueryBuilder , string )) ([]* DocumentMatch , error ) {
373
384
query = prepareQuery (query )
374
385
if query == "" {
375
386
return nil , nil
@@ -380,10 +391,55 @@ func (f *ftSearchTantivy) Search(spaceId string, query string) (results []*Docum
380
391
qb .Query (tantivy .Must , fieldSpace , spaceId , tantivy .TermQuery , 1.0 )
381
392
}
382
393
394
+ buildQueryFunc (qb , query )
395
+
396
+ finalQuery := qb .Build ()
397
+ sCtx := tantivy .NewSearchContextBuilder ().
398
+ SetQueryFromJson (& finalQuery ).
399
+ SetDocsLimit (100 ).
400
+ SetWithHighlights (true ).
401
+ Build ()
402
+
403
+ result , err := f .index .SearchJson (sCtx )
404
+ if err != nil {
405
+ return nil , wrapError (err )
406
+ }
407
+
408
+ p := f .parserPool .Get ()
409
+ defer f .parserPool .Put (p )
410
+
411
+ return tantivy .GetSearchResults (
412
+ result ,
413
+ f .schema ,
414
+ func (json string ) (* DocumentMatch , error ) {
415
+ return parseSearchResult (json , p )
416
+ },
417
+ fieldId ,
418
+ )
419
+ }
420
+
421
+ func (f * ftSearchTantivy ) buildObjectQuery (qb * tantivy.QueryBuilder , query string ) {
422
+ qb .Query (tantivy .Must , fieldId , bundle .RelationKeyName .String (), tantivy .TermQuery , 1.0 )
383
423
if containsChineseCharacters (query ) {
384
424
qb .BooleanQuery (tantivy .Must , qb .NestedBuilder ().
385
- Query (tantivy .Should , fieldTitleZh , query , tantivy .PhrasePrefixQuery , 5.0 ).
386
- Query (tantivy .Should , fieldTitleZh , query , tantivy .PhraseQuery , 5.0 ).
425
+ Query (tantivy .Should , fieldTitleZh , query , tantivy .PhrasePrefixQuery , 1.0 ).
426
+ Query (tantivy .Should , fieldTextZh , query , tantivy .PhrasePrefixQuery , 1.0 ),
427
+ 1.0 ,
428
+ )
429
+ } else {
430
+ qb .BooleanQuery (tantivy .Must , qb .NestedBuilder ().
431
+ Query (tantivy .Should , fieldTitle , query , tantivy .PhrasePrefixQuery , 1.0 ).
432
+ Query (tantivy .Should , fieldText , query , tantivy .PhrasePrefixQuery , 1.0 ),
433
+ 1.0 ,
434
+ )
435
+ }
436
+ }
437
+
438
+ func (f * ftSearchTantivy ) buildDetailedQuery (qb * tantivy.QueryBuilder , query string ) {
439
+ if containsChineseCharacters (query ) {
440
+ qb .BooleanQuery (tantivy .Must , qb .NestedBuilder ().
441
+ Query (tantivy .Should , fieldTitleZh , query , tantivy .PhrasePrefixQuery , 10.0 ).
442
+ Query (tantivy .Should , fieldTitleZh , query , tantivy .PhraseQuery , 10.0 ).
387
443
Query (tantivy .Should , fieldTitleZh , query , tantivy .EveryTermQuery , 0.75 ).
388
444
Query (tantivy .Should , fieldTitleZh , query , tantivy .OneOfTermQuery , 0.5 ).
389
445
Query (tantivy .Should , fieldTextZh , query , tantivy .PhrasePrefixQuery , 1.0 ).
@@ -405,58 +461,41 @@ func (f *ftSearchTantivy) Search(spaceId string, query string) (results []*Docum
405
461
1.0 ,
406
462
)
407
463
}
464
+ }
408
465
409
- finalQuery := qb .Build ()
410
- sCtx := tantivy .NewSearchContextBuilder ().
411
- SetQueryFromJson (& finalQuery ).
412
- SetDocsLimit (100 ).
413
- SetWithHighlights (true ).
414
- Build ()
415
-
416
- result , err := f .index .SearchJson (sCtx )
417
-
466
+ func parseSearchResult (json string , parser * fastjson.Parser ) (* DocumentMatch , error ) {
467
+ value , err := parser .Parse (json )
418
468
if err != nil {
419
469
return nil , wrapError (err )
420
470
}
421
- p := f .parserPool .Get ()
422
- defer f .parserPool .Put (p )
423
471
424
- return tantivy .GetSearchResults (
425
- result ,
426
- f .schema ,
427
- func (json string ) (* DocumentMatch , error ) {
428
- value , err := p .Parse (json )
429
- if err != nil {
430
- return nil , err
431
- }
432
- highlights := value .GetArray (highlights )
433
-
434
- fragments := map [string ]* Highlight {}
435
- for _ , val := range highlights {
436
- object := val .GetObject ()
437
- fieldName := string (object .Get (fieldNameTxt ).GetStringBytes ())
438
- if fieldName == fieldTitle || fieldName == fieldTitleZh {
439
- fragments = map [string ]* Highlight {}
440
- break
441
- }
442
- if fieldName == fieldText || fieldName == fieldTextZh {
443
- extractHighlight (object , fragments , fieldName )
444
- }
445
- }
472
+ highlights := value .GetArray (highlights )
473
+ fragments := map [string ]* Highlight {}
446
474
447
- if len (fragments ) == 2 {
448
- delete (fragments , fieldTextZh )
449
- // remove chinese if something non chinese presents
450
- }
475
+ for _ , val := range highlights {
476
+ object := val .GetObject ()
477
+ fieldName := string (object .Get (fieldNameTxt ).GetStringBytes ())
451
478
452
- return & DocumentMatch {
453
- Score : value .GetFloat64 (score ),
454
- ID : string (value .GetStringBytes (fieldId )),
455
- Fragments : fragments ,
456
- }, nil
457
- },
458
- fieldId ,
459
- )
479
+ if fieldName == fieldTitle || fieldName == fieldTitleZh {
480
+ fragments = map [string ]* Highlight {}
481
+ break
482
+ }
483
+
484
+ if fieldName == fieldText || fieldName == fieldTextZh {
485
+ extractHighlight (object , fragments , fieldName )
486
+ }
487
+ }
488
+
489
+ if len (fragments ) == 2 {
490
+ // Remove Chinese highlights if non-Chinese highlights are present
491
+ delete (fragments , fieldTextZh )
492
+ }
493
+
494
+ return & DocumentMatch {
495
+ Score : value .GetFloat64 (score ),
496
+ ID : string (value .GetStringBytes (fieldId )),
497
+ Fragments : fragments ,
498
+ }, nil
460
499
}
461
500
462
501
func containsChineseCharacters (s string ) bool {
0 commit comments