From 84f0b04ac1d53a3d636e7a300278cb423fa5880f Mon Sep 17 00:00:00 2001 From: Nick Tobey Date: Mon, 6 Jan 2025 15:40:34 -0800 Subject: [PATCH] Respond to PR feedback: fix typos and outline methods. --- go/libraries/doltcore/sqle/tables.go | 2 +- .../editor/creation/external_build_index.go | 72 +++++++++++-------- go/serial/schema.fbs | 2 +- go/serial/vectorindexnode.fbs | 10 +-- 4 files changed, 48 insertions(+), 38 deletions(-) diff --git a/go/libraries/doltcore/sqle/tables.go b/go/libraries/doltcore/sqle/tables.go index 3bedf3de0ba..e8fb46ea5d1 100644 --- a/go/libraries/doltcore/sqle/tables.go +++ b/go/libraries/doltcore/sqle/tables.go @@ -2901,7 +2901,7 @@ func (t *WritableDoltTable) UpdateForeignKey(ctx *sql.Context, fkName string, sq // CreateIndexForForeignKey implements sql.ForeignKeyTable func (t *AlterableDoltTable) CreateIndexForForeignKey(ctx *sql.Context, idx sql.IndexDef) error { if idx.Constraint != sql.IndexConstraint_None && idx.Constraint != sql.IndexConstraint_Unique && idx.Constraint != sql.IndexConstraint_Spatial { - return fmt.Errorf("only the following types of index constraints are supported for foriegn keys: none, unique, spatial") + return fmt.Errorf("only the following types of index constraints are supported for foreign keys: none, unique, spatial") } columns := make([]string, len(idx.Columns)) for i, indexCol := range idx.Columns { diff --git a/go/libraries/doltcore/table/editor/creation/external_build_index.go b/go/libraries/doltcore/table/editor/creation/external_build_index.go index 3f64a250a0c..285965ae827 100644 --- a/go/libraries/doltcore/table/editor/creation/external_build_index.go +++ b/go/libraries/doltcore/table/editor/creation/external_build_index.go @@ -59,35 +59,7 @@ func BuildProllyIndexExternal(ctx *sql.Context, vrw types.ValueReadWriter, ns tr } if idx.IsVector() { - // Secondary indexes are always covering and have no non-key columns - valDesc := val.NewTupleDescriptor() - proximityMapBuilder, err := prolly.NewProximityMapBuilder(ctx, ns, idx.VectorProperties().DistanceType, keyDesc, valDesc, prolly.DefaultLogChunkSize) - if err != nil { - return nil, err - } - for { - k, v, err := iter.Next(ctx) - if err == io.EOF { - break - } else if err != nil { - return nil, err - } - - idxKey, err := secondaryBld.SecondaryKeyFromRow(ctx, k, v) - if err != nil { - return nil, err - } - - if uniqCb != nil && prefixDesc.HasNulls(idxKey) { - continue - } - - if err := proximityMapBuilder.Insert(ctx, idxKey, val.EmptyTuple); err != nil { - return nil, err - } - } - proximityMap, err := proximityMapBuilder.Flush(ctx) - return durable.IndexFromProximityMap(proximityMap), nil + return BuildProximityIndex(ctx, ns, idx, keyDesc, prefixDesc, iter, secondaryBld, uniqCb) } sorter := sort.NewTupleSorter(batchSize, fileMax, func(t1, t2 val.Tuple) bool { @@ -144,6 +116,48 @@ func BuildProllyIndexExternal(ctx *sql.Context, vrw types.ValueReadWriter, ns tr return durable.IndexFromProllyMap(ret), nil } +// func BuildProximityIndexExternal(ctx *sql.Context, vrw types.ValueReadWriter, ns tree.NodeStore, sch schema.Schema, tableName string, idx schema.Index, primary prolly.Map, uniqCb DupEntryCb) (durable.Index, error) { +func BuildProximityIndex( + ctx *sql.Context, + ns tree.NodeStore, + idx schema.Index, + keyDesc val.TupleDesc, + prefixDesc val.TupleDesc, + iter prolly.MapIter, + secondaryBld index.SecondaryKeyBuilder, + uniqCb DupEntryCb, +) (durable.Index, error) { + // Secondary indexes have no non-key columns + valDesc := val.NewTupleDescriptor() + proximityMapBuilder, err := prolly.NewProximityMapBuilder(ctx, ns, idx.VectorProperties().DistanceType, keyDesc, valDesc, prolly.DefaultLogChunkSize) + if err != nil { + return nil, err + } + for { + k, v, err := iter.Next(ctx) + if err == io.EOF { + break + } else if err != nil { + return nil, err + } + + idxKey, err := secondaryBld.SecondaryKeyFromRow(ctx, k, v) + if err != nil { + return nil, err + } + + if uniqCb != nil && prefixDesc.HasNulls(idxKey) { + continue + } + + if err := proximityMapBuilder.Insert(ctx, idxKey, val.EmptyTuple); err != nil { + return nil, err + } + } + proximityMap, err := proximityMapBuilder.Flush(ctx) + return durable.IndexFromProximityMap(proximityMap), nil +} + type tupleIterWithCb struct { iter sort.KeyIter err error diff --git a/go/serial/schema.fbs b/go/serial/schema.fbs index ba3133c042c..449290c698c 100644 --- a/go/serial/schema.fbs +++ b/go/serial/schema.fbs @@ -109,7 +109,7 @@ table Index { fulltext_key:bool; fulltext_info:FulltextInfo; - // fulltext information + // vector information // these fields should be set for vector indexes and otherwise omitted, for backwards compatibility vector_key:bool; vector_info:VectorInfo; diff --git a/go/serial/vectorindexnode.fbs b/go/serial/vectorindexnode.fbs index dd09e44c78a..852b9ba2a13 100644 --- a/go/serial/vectorindexnode.fbs +++ b/go/serial/vectorindexnode.fbs @@ -19,25 +19,21 @@ namespace serial; table VectorIndexNode { // sorted array of key items key_items:[ubyte] (required); - // items offets for |key_items| + // item offsets for |key_items| // first offset is 0, last offset is len(key_items) key_offsets:[uint32] (required); - // item type for |key_items| - // key_type:ItemType; // array of values items, ordered by paired key value_items:[ubyte]; // item offsets for |value_items| // first offset is 0, last offset is len(value_items) value_offsets:[uint32]; - // item type for |value_items| - // value_type:ItemType; // array of chunk addresses // - subtree addresses for internal prolly tree nodes // - value addresses for AddressMap leaf nodes - // node that while the keys in this index are addresses to JSON chunks, we don't store those in the address_array - // because we are guarenteed to have other references to those chunks in the primary index. + // note that while the keys in this index are addresses to JSON chunks, we don't store those in the address_array + // because we are guaranteed to have other references to those chunks in the primary index. address_array:[ubyte] (required); // array of varint encoded subtree counts