@@ -62,7 +62,7 @@ func (*ZapPlugin) newWithChunkMode(results []index.Document,
6262 br .Grow (estimateAvgBytesPerDoc * estimateNumResults )
6363 }
6464
65- s .results = results
65+ s .results , s . edgeList = flattenNestedDocuments ( results )
6666 s .chunkMode = chunkMode
6767 s .w = NewCountHashWriter (& br )
6868
@@ -72,7 +72,7 @@ func (*ZapPlugin) newWithChunkMode(results []index.Document,
7272 }
7373
7474 sb , err := InitSegmentBase (br .Bytes (), s .w .Sum32 (), chunkMode ,
75- uint64 (len (results )), storedIndexOffset , sectionsIndexOffset )
75+ uint64 (len (s . results )), storedIndexOffset , sectionsIndexOffset )
7676
7777 // get the bytes written before the interim's reset() call
7878 // write it to the newly formed segment base.
@@ -94,6 +94,9 @@ var interimPool = sync.Pool{New: func() interface{} { return &interim{} }}
9494type interim struct {
9595 results []index.Document
9696
97+ // edge list for nested documents: child -> parent
98+ edgeList map [int ]uint64
99+
97100 chunkMode uint32
98101
99102 w * CountHashWriter
@@ -122,6 +125,7 @@ type interim struct {
122125
123126func (s * interim ) reset () (err error ) {
124127 s .results = nil
128+ s .edgeList = nil
125129 s .chunkMode = 0
126130 s .w = nil
127131 for k := range s .FieldsMap {
@@ -408,6 +412,27 @@ func (s *interim) writeStoredFields() (
408412 }
409413 }
410414
415+ // write the number of edges in the child -> parent edge list
416+ // this will be zero if there are no nested documents
417+ // and this number also reflects the number of sub-documents
418+ // in the segment
419+ err = binary .Write (s .w , binary .BigEndian , uint64 (len (s .edgeList )))
420+ if err != nil {
421+ return 0 , err
422+ }
423+ // write the child -> parent edge list
424+ // child and parent are both flattened doc ids
425+ for child , parent := range s .edgeList {
426+ err = binary .Write (s .w , binary .BigEndian , uint64 (child ))
427+ if err != nil {
428+ return 0 , err
429+ }
430+ err = binary .Write (s .w , binary .BigEndian , parent )
431+ if err != nil {
432+ return 0 , err
433+ }
434+ }
435+
411436 return storedIndexOffset , nil
412437}
413438
@@ -437,3 +462,48 @@ func numUvarintBytes(x uint64) (n int) {
437462 }
438463 return n + 1
439464}
465+
466+ // flattenNestedDocuments returns a preorder list of the given documents and all their nested documents,
467+ // along with a map mapping each flattened index to its parent index (excluding root docs entirely).
468+ func flattenNestedDocuments (docs []index.Document ) ([]index.Document , map [int ]uint64 ) {
469+ totalCount := 0
470+ for _ , doc := range docs {
471+ totalCount += countNestedDocuments (doc )
472+ }
473+
474+ flattened := make ([]index.Document , 0 , totalCount )
475+ edgeMap := make (map [int ]uint64 , totalCount )
476+
477+ var traverse func (doc index.Document , hasParent bool , parentIdx int )
478+ traverse = func (d index.Document , hasParent bool , parentIdx int ) {
479+ curIdx := len (flattened )
480+ flattened = append (flattened , d )
481+
482+ if hasParent {
483+ edgeMap [curIdx ] = uint64 (parentIdx )
484+ }
485+
486+ if nestedDoc , ok := d .(index.NestedDocument ); ok {
487+ nestedDoc .VisitNestedDocuments (func (child index.Document ) {
488+ traverse (child , true , curIdx )
489+ })
490+ }
491+ }
492+ // Top-level docs have no parent
493+ for _ , doc := range docs {
494+ traverse (doc , false , 0 )
495+ }
496+ return flattened , edgeMap
497+ }
498+
499+ // countNestedDocuments returns the total number of docs in preorder,
500+ // including the parent and all descendants.
501+ func countNestedDocuments (doc index.Document ) int {
502+ count := 1 // include this doc
503+ if nd , ok := doc .(index.NestedDocument ); ok {
504+ nd .VisitNestedDocuments (func (child index.Document ) {
505+ count += countNestedDocuments (child )
506+ })
507+ }
508+ return count
509+ }
0 commit comments