From 0defcf2ae6dca36539583791326eaaa82dcc1289 Mon Sep 17 00:00:00 2001 From: xuanwei zhang Date: Thu, 15 Jun 2023 16:00:31 +0800 Subject: [PATCH 1/5] Add unimplemented manifest method --- go/internal/manifest/manifest.go | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/go/internal/manifest/manifest.go b/go/internal/manifest/manifest.go index c0843164..b884d914 100644 --- a/go/internal/manifest/manifest.go +++ b/go/internal/manifest/manifest.go @@ -48,3 +48,27 @@ type ManifestV2 struct { func (m *ManifestV2) Schema() *arrow.Schema { return m.schema } + +func (m *ManifestV2) AddScalarDataFiles(files ...*DataFile) { + m.files = append(m.files, files...) + +} + +func (m *ManifestV2) AddVectorDataFiles(files ...*DataFile) { + m.files = append(m.files, files...) +} + +func (m *ManifestV2) ScalarSchema() *arrow.Schema { + panic("implement me") + return nil +} + +func (m *ManifestV2) VectorSchema() *arrow.Schema { + panic("implement me") + return nil +} + +func WriteManifestV2File(fs fs.Fs, manifest *ManifestV2) error { + panic("implement me") + return nil +} From 0ddea768b0f145c8b8c1062aeaf8cf5da30fd6fa Mon Sep 17 00:00:00 2001 From: xuanwei zhang Date: Thu, 15 Jun 2023 16:01:34 +0800 Subject: [PATCH 2/5] Change gitignore and add CI for go --- .github/workflows/ci.yaml | 29 +++++++++++++++++++++++++++++ .gitignore | 3 +++ 2 files changed, 32 insertions(+) create mode 100644 .github/workflows/ci.yaml diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml new file mode 100644 index 00000000..3b7ec886 --- /dev/null +++ b/.github/workflows/ci.yaml @@ -0,0 +1,29 @@ +name: Go CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +jobs: + test: + name: Test + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Setup Go + uses: actions/setup-go@v2 + with: + go-version: 1.18 + + - name: Install dependencies + run: cd go && go mod download + + - name: Run tests + run: cd go && go test -v ./... diff --git a/.gitignore b/.gitignore index 9e6df199..fd2db394 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,8 @@ cpp/build/* cpp/.vscode/* cpp/.cache/* +cpp/.idea/* +go/.vscode/* +go/.idea/* compile_commands.json CMakeUserPresets.json From f46f966758f13592e4a1479d56a55c5909b9ae4b Mon Sep 17 00:00:00 2001 From: xuanwei zhang Date: Thu, 15 Jun 2023 16:25:29 +0800 Subject: [PATCH 3/5] Change CI including paths --- .github/workflows/ci.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3b7ec886..f2c47ac3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -2,9 +2,15 @@ name: Go CI on: push: + -pachs: + -'go/**' + -'!cpp/**' branches: - main pull_request: + -pachs: + -'go/**' + -'!cpp/**' branches: - main From 019f407e0a2241b90201545af99ac48a7854e6e6 Mon Sep 17 00:00:00 2001 From: Xwg Date: Thu, 22 Jun 2023 13:54:51 +0800 Subject: [PATCH 4/5] feat:(storage): Finish basic storage module. - Remove redundant code. - Adjust directory structure. - Implement writing logic. --- go/Makefile | 39 + go/README.md | 18 + go/common/constant.go | 7 + go/common/fs_util.go | 1 + go/common/result/result.go | 37 + go/common/status/status.go | 73 ++ go/common/utils/utils.go | 316 +++++ go/file/fragment/deletefragment.go | 28 + go/file/fragment/fragment.go | 57 + go/go.mod | 2 +- go/internal/fs/factory.go | 21 - go/internal/fs/fs.go | 7 - go/internal/manifest/manifest.go | 74 -- .../format/parquet/file_reader.go | 4 +- .../format/parquet/file_writer.go | 4 +- go/{internal => io}/format/reader.go | 0 go/{internal => io}/format/writer.go | 0 go/io/fs/factory.go | 23 + go/{internal => io}/fs/file/file.go | 0 go/io/fs/file/local_file.go | 31 + go/{internal => io}/fs/file/memory_file.go | 0 go/io/fs/fs.go | 9 + go/io/fs/local_fs.go | 28 + go/{internal => io}/fs/memory_fs.go | 16 +- go/main.go | 1 + go/options/options.go | 23 - go/proto/manifest.proto | 20 + go/proto/manifest_proto/manifest.pb.go | 346 +++++ go/proto/schema.proto | 112 ++ go/proto/schema_proto/schema.pb.go | 1156 +++++++++++++++++ go/storage/default_space.go | 264 ++++ go/storage/manifest/manifest.go | 173 +++ go/storage/options/options.go | 102 ++ go/storage/record_reader.go | 10 +- go/storage/reference_space.go | 98 ++ go/storage/schema/schema.go | 113 ++ go/storage/separate_vector_space.go | 120 -- go/storage/space.go | 95 +- go/storage/space_test.go | 6 +- 39 files changed, 3081 insertions(+), 353 deletions(-) create mode 100644 go/Makefile create mode 100644 go/common/constant.go create mode 100644 go/common/fs_util.go create mode 100644 go/common/result/result.go create mode 100644 go/common/status/status.go create mode 100644 go/common/utils/utils.go create mode 100644 go/file/fragment/deletefragment.go create mode 100644 go/file/fragment/fragment.go delete mode 100644 go/internal/fs/factory.go delete mode 100644 go/internal/fs/fs.go delete mode 100644 go/internal/manifest/manifest.go rename go/{internal => io}/format/parquet/file_reader.go (98%) rename go/{internal => io}/format/parquet/file_writer.go (88%) rename go/{internal => io}/format/reader.go (100%) rename go/{internal => io}/format/writer.go (100%) create mode 100644 go/io/fs/factory.go rename go/{internal => io}/fs/file/file.go (100%) create mode 100644 go/io/fs/file/local_file.go rename go/{internal => io}/fs/file/memory_file.go (100%) create mode 100644 go/io/fs/fs.go create mode 100644 go/io/fs/local_fs.go rename go/{internal => io}/fs/memory_fs.go (53%) create mode 100644 go/main.go delete mode 100644 go/options/options.go create mode 100644 go/proto/manifest.proto create mode 100644 go/proto/manifest_proto/manifest.pb.go create mode 100644 go/proto/schema.proto create mode 100644 go/proto/schema_proto/schema.pb.go create mode 100644 go/storage/default_space.go create mode 100644 go/storage/manifest/manifest.go create mode 100644 go/storage/options/options.go create mode 100644 go/storage/reference_space.go create mode 100644 go/storage/schema/schema.go delete mode 100644 go/storage/separate_vector_space.go diff --git a/go/Makefile b/go/Makefile new file mode 100644 index 00000000..2857279a --- /dev/null +++ b/go/Makefile @@ -0,0 +1,39 @@ +.EXPORT_ALL_VARIABLES: + +define HELP_INFO +Usage: + make + +Target: + all build all executables (default) + protos compile server protobuf files + prepare prepare dependencies + clean clean artifacts +endef + + +.PHONY: all +all: prepare\ + protos \ + +.PHONY: prepare +prepare: + go install google.golang.org/protobuf/cmd/protoc-gen-go@latest + go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest + go install github.com/grpc-ecosystem/grpc-gateway/v2/protoc-gen-grpc-gateway@latest + + +.PHONY: protos +protos: + $(MAKE) -C proto/mainfest + $(MAKE) -C proto/scheme + +.PHONY: clean-protos +clean-protos: + $(MAKE) -C proto/mainfest clean + $(MAKE) -C proto/scheme clean + + +.PHONY: clean +clean: clean-protos \ + diff --git a/go/README.md b/go/README.md index 06a0a713..01caba35 100644 --- a/go/README.md +++ b/go/README.md @@ -1 +1,19 @@ A lib to store and query scalar and vector data. + + +## install dependencies + +```bash +go install google.golang.org/protobuf/cmd/protoc-gen-go@latest +go install google.golang.org/grpc/cmd/protoc-gen-go-grpc@latest +```` +## generate proto + +```bash +cd proto +mkdir manifest_proto +mkdir schema_proto +protoc --go_out=./manifest_proto --go_opt=paths=source_relative manifest_proto +protoc --go_out=./schema_proto --go_opt=paths=source_relative schema.proto + +``` diff --git a/go/common/constant.go b/go/common/constant.go new file mode 100644 index 00000000..b37a4bc1 --- /dev/null +++ b/go/common/constant.go @@ -0,0 +1,7 @@ +package common + +const KReadBatchSize = 1024 +const KManifestTempFileName = "manifest.tmp" +const KManifestFileName = "manifest" +const KParquetDataFileSuffix = ".parquet" +const KOffsetFieldName = "__offset" diff --git a/go/common/fs_util.go b/go/common/fs_util.go new file mode 100644 index 00000000..805d0c79 --- /dev/null +++ b/go/common/fs_util.go @@ -0,0 +1 @@ +package common diff --git a/go/common/result/result.go b/go/common/result/result.go new file mode 100644 index 00000000..5e285691 --- /dev/null +++ b/go/common/result/result.go @@ -0,0 +1,37 @@ +package result + +import ( + "github.com/milvus-io/milvus-storage-format/common/status" +) + +type Result[T any] struct { + value *T + status *status.Status +} + +func NewResult[T any](value T) *Result[T] { + return &Result[T]{value: &value} +} + +func NewResultFromStatus[T any](status status.Status) *Result[T] { + return &Result[T]{status: &status} +} + +func (r *Result[T]) Ok() bool { + return r.value != nil +} + +func (r *Result[T]) HasValue() bool { + return r.value != nil +} + +func (r *Result[T]) Value() T { + if r.value == nil { + panic("value is nil") + } + return *r.value +} + +func (r *Result[T]) Status() *status.Status { + return r.status +} diff --git a/go/common/status/status.go b/go/common/status/status.go new file mode 100644 index 00000000..eec202fb --- /dev/null +++ b/go/common/status/status.go @@ -0,0 +1,73 @@ +package status + +type Code int32 + +const ( + KOk Code = 0 + kArrowError Code = 1 + kInvalidArgument Code = 2 + kInternalStateError Code = 3 +) + +type Status struct { + code Code + msg string +} + +func NewStatus(code Code, msg string) *Status { + return &Status{ + code: code, + msg: msg, + } +} + +func (s *Status) Code() Code { + return s.code +} + +func (s *Status) Msg() string { + return s.msg +} + +func OK() Status { + return Status{ + code: KOk, + } +} + +func ArrowError(msg string) Status { + return Status{ + code: kArrowError, + msg: msg, + } +} + +func InvalidArgument(msg string) Status { + return Status{ + code: kInvalidArgument, + msg: msg, + } +} + +func InternalStateError(msg string) Status { + return Status{ + code: kInternalStateError, + msg: msg, + } +} + +func (s *Status) IsOK() bool { + return s.code == KOk +} + +func (s *Status) IsArrowError() bool { + return s.code == kArrowError +} + +func (s *Status) IsInvalidArgument() bool { + return s.code == kInvalidArgument +} + +func (s *Status) IsInternalStateError() bool { + return s.code == kInternalStateError +} diff --git a/go/common/utils/utils.go b/go/common/utils/utils.go new file mode 100644 index 00000000..2e2bb458 --- /dev/null +++ b/go/common/utils/utils.go @@ -0,0 +1,316 @@ +package utils + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/endian" + "github.com/google/uuid" + "github.com/milvus-io/milvus-storage-format/common" + "github.com/milvus-io/milvus-storage-format/common/result" + "github.com/milvus-io/milvus-storage-format/common/status" + "github.com/milvus-io/milvus-storage-format/proto/schema_proto" + "strconv" +) + +func ToProtobufType(dataType arrow.Type) *result.Result[schema_proto.LogicType] { + typeId := int(dataType) + if typeId < 0 || typeId >= int(schema_proto.LogicType_MAX_ID) { + return result.NewResultFromStatus[schema_proto.LogicType](status.InvalidArgument("Invalid type id: " + strconv.Itoa(typeId))) + } + return result.NewResult[schema_proto.LogicType](schema_proto.LogicType(typeId)) +} + +func ToProtobufMetadata(metadata *arrow.Metadata) *result.Result[*schema_proto.KeyValueMetadata] { + keys := metadata.Keys() + values := metadata.Values() + return result.NewResult[*schema_proto.KeyValueMetadata](&schema_proto.KeyValueMetadata{Keys: keys, Values: values}) +} + +func ToProtobufDataType(dataType arrow.DataType) *result.Result[*schema_proto.DataType] { + protoType := &schema_proto.DataType{} + stat := SetTypeValues(protoType, dataType) + if !stat.IsOK() { + return result.NewResultFromStatus[*schema_proto.DataType](stat) + } + logicType := ToProtobufType(dataType.ID()) + if !logicType.Status().IsOK() { + return result.NewResultFromStatus[*schema_proto.DataType](*logicType.Status()) + } + protoType.LogicType = logicType.Value() + + if len(GetFields(dataType)) > 0 { + for _, field := range GetFields(dataType) { + protoField := &schema_proto.Field{} + protoFieldType := ToProtobufField(&field) + if !protoFieldType.Ok() { + return result.NewResultFromStatus[*schema_proto.DataType](*protoFieldType.Status()) + } + protoField = protoFieldType.Value() + protoType.Children = append(protoType.Children, protoField) + } + } + + return result.NewResult[*schema_proto.DataType](protoType) +} + +// GetFields TODO CHECK MORE TYPES +func GetFields(dataType arrow.DataType) []arrow.Field { + switch dataType.ID() { + case arrow.LIST: + listType, _ := dataType.(*arrow.ListType) + return listType.Fields() + case arrow.STRUCT: + structType, _ := dataType.(*arrow.StructType) + return structType.Fields() + case arrow.MAP: + mapType, _ := dataType.(*arrow.MapType) + return mapType.Fields() + default: + return nil + } +} + +func ToProtobufField(field *arrow.Field) *result.Result[*schema_proto.Field] { + protoField := &schema_proto.Field{} + protoField.Name = field.Name + protoField.Nullable = field.Nullable + + fieldMetadata := ToProtobufMetadata(&field.Metadata) + if !fieldMetadata.Status().IsOK() { + return result.NewResultFromStatus[*schema_proto.Field](*fieldMetadata.Status()) + } + protoField.Metadata = fieldMetadata.Value() + dataType := ToProtobufDataType(field.Type) + if !dataType.Status().IsOK() { + return result.NewResultFromStatus[*schema_proto.Field](*dataType.Status()) + } + protoField.DataType = dataType.Value() + return result.NewResult[*schema_proto.Field](protoField) +} + +func SetTypeValues(protoType *schema_proto.DataType, dataType arrow.DataType) status.Status { + switch dataType.ID() { + case arrow.FIXED_SIZE_BINARY: + realType, ok := dataType.(*arrow.FixedSizeBinaryType) + if !ok { + return status.InvalidArgument("invalid type") + } + fixedSizeBinaryType := &schema_proto.FixedSizeBinaryType{} + fixedSizeBinaryType.ByteWidth = int32(realType.ByteWidth) + protoType.TypeRelatedValues = &schema_proto.DataType_FixedSizeBinaryType{FixedSizeBinaryType: fixedSizeBinaryType} + break + case arrow.FIXED_SIZE_LIST: + realType, ok := dataType.(*arrow.FixedSizeListType) + if !ok { + return status.InvalidArgument("invalid type") + } + fixedSizeListType := &schema_proto.FixedSizeListType{} + fixedSizeListType.ListSize = int32(realType.Len()) + protoType.TypeRelatedValues = &schema_proto.DataType_FixedSizeListType{FixedSizeListType: fixedSizeListType} + break + case arrow.DICTIONARY: + realType, ok := dataType.(*arrow.DictionaryType) + if !ok { + return status.InvalidArgument("invalid type") + } + dictionaryType := &schema_proto.DictionaryType{} + indexType := ToProtobufDataType(realType.IndexType) + if !indexType.Status().IsOK() { + return *indexType.Status() + } + dictionaryType.IndexType = indexType.Value() + valueType := ToProtobufDataType(realType.ValueType) + if !valueType.Status().IsOK() { + return *valueType.Status() + } + dictionaryType.ValueType = valueType.Value() + dictionaryType.Ordered = realType.Ordered + protoType.TypeRelatedValues = &schema_proto.DataType_DictionaryType{DictionaryType: dictionaryType} + break + + case arrow.MAP: + realType, ok := dataType.(*arrow.MapType) + if !ok { + return status.InvalidArgument("invalid type") + } + mapType := &schema_proto.MapType{} + mapType.KeysSorted = realType.KeysSorted + protoType.TypeRelatedValues = &schema_proto.DataType_MapType{MapType: mapType} + break + + default: + return status.InvalidArgument("Invalid type id: " + strconv.Itoa(int(dataType.ID()))) + } + + return status.OK() +} + +func ToProtobufSchema(schema *arrow.Schema) *result.Result[*schema_proto.ArrowSchema] { + protoSchema := &schema_proto.ArrowSchema{} + for _, field := range schema.Fields() { + protoField := ToProtobufField(&field) + if !protoField.Status().IsOK() { + return result.NewResultFromStatus[*schema_proto.ArrowSchema](*protoField.Status()) + } + protoSchema.Fields = append(protoSchema.Fields, protoField.Value()) + } + if schema.Endianness() == endian.LittleEndian { + protoSchema.Endianness = schema_proto.Endianness_Little + } else if schema.Endianness() == endian.BigEndian { + protoSchema.Endianness = schema_proto.Endianness_Big + } + + for _, key := range schema.Metadata().Keys() { + protoKeyValue := protoSchema.Metadata + protoKeyValue.Keys = append(protoKeyValue.Keys, key) + } + for _, value := range schema.Metadata().Values() { + protoKeyValue := protoSchema.Metadata + protoKeyValue.Values = append(protoKeyValue.Values, value) + } + return result.NewResult[*schema_proto.ArrowSchema](protoSchema) +} + +func FromProtobufSchema(schema *schema_proto.ArrowSchema) *result.Result[*arrow.Schema] { + fields := make([]arrow.Field, 0, len(schema.Fields)) + for _, field := range schema.Fields { + tmp := FromProtobufField(field) + if !tmp.Status().IsOK() { + return result.NewResultFromStatus[*arrow.Schema](*tmp.Status()) + } + fields = append(fields, *tmp.Value()) + } + tmp := FromProtobufKeyValueMetadata(schema.Metadata) + if !tmp.Status().IsOK() { + return result.NewResultFromStatus[*arrow.Schema](*tmp.Status()) + } + metadata := tmp.Value() + newSchema := arrow.NewSchema(fields, metadata) + return result.NewResult[*arrow.Schema](newSchema) +} + +func FromProtobufField(field *schema_proto.Field) *result.Result[*arrow.Field] { + tmp := FromProtobufDataType(field.DataType) + if !tmp.Status().IsOK() { + return result.NewResultFromStatus[*arrow.Field](*tmp.Status()) + } + dataType := tmp.Value() + tmp1 := FromProtobufKeyValueMetadata(field.GetMetadata()) + if !tmp1.Status().IsOK() { + return result.NewResultFromStatus[*arrow.Field](*tmp1.Status()) + } + metadata := tmp1.Value() + return result.NewResult[*arrow.Field](&arrow.Field{Name: field.Name, Type: dataType, Nullable: field.Nullable, Metadata: *metadata}) +} + +func FromProtobufKeyValueMetadata(metadata *schema_proto.KeyValueMetadata) *result.Result[*arrow.Metadata] { + keys := metadata.Keys + values := metadata.Values + newMetadata := arrow.NewMetadata(keys, values) + return result.NewResult[*arrow.Metadata](&newMetadata) +} +func FromProtobufDataType(dataType *schema_proto.DataType) *result.Result[arrow.DataType] { + switch dataType.LogicType { + case schema_proto.LogicType_NA: + return result.NewResult[arrow.DataType](&arrow.NullType{}) + case schema_proto.LogicType_BOOL: + return result.NewResult[arrow.DataType](&arrow.BooleanType{}) + case schema_proto.LogicType_UINT8: + return result.NewResult[arrow.DataType](&arrow.Uint8Type{}) + case schema_proto.LogicType_INT8: + return result.NewResult[arrow.DataType](&arrow.Int8Type{}) + case schema_proto.LogicType_UINT16: + return result.NewResult[arrow.DataType](&arrow.Uint16Type{}) + case schema_proto.LogicType_INT16: + return result.NewResult[arrow.DataType](&arrow.Int16Type{}) + case schema_proto.LogicType_UINT32: + return result.NewResult[arrow.DataType](&arrow.Uint32Type{}) + case schema_proto.LogicType_INT32: + return result.NewResult[arrow.DataType](&arrow.Int32Type{}) + case schema_proto.LogicType_UINT64: + return result.NewResult[arrow.DataType](&arrow.Uint64Type{}) + case schema_proto.LogicType_INT64: + return result.NewResult[arrow.DataType](&arrow.Int64Type{}) + case schema_proto.LogicType_HALF_FLOAT: + return result.NewResult[arrow.DataType](&arrow.Float16Type{}) + case schema_proto.LogicType_FLOAT: + return result.NewResult[arrow.DataType](&arrow.Float32Type{}) + case schema_proto.LogicType_DOUBLE: + return result.NewResult[arrow.DataType](&arrow.Float64Type{}) + case schema_proto.LogicType_STRING: + return result.NewResult[arrow.DataType](&arrow.StringType{}) + case schema_proto.LogicType_BINARY: + return result.NewResult[arrow.DataType](&arrow.BinaryType{}) + + case schema_proto.LogicType_LIST: + fieldType := FromProtobufField(dataType.Children[0]) + if !fieldType.Status().IsOK() { + return result.NewResultFromStatus[arrow.DataType](*fieldType.Status()) + } + listType := arrow.ListOf(fieldType.Value().Type) + return result.NewResult[arrow.DataType](listType) + + case schema_proto.LogicType_STRUCT: + fields := make([]arrow.Field, 0, len(dataType.Children)) + for _, child := range dataType.Children { + field := FromProtobufField(child) + if !field.Status().IsOK() { + return result.NewResultFromStatus[arrow.DataType](*field.Status()) + } + fields = append(fields, *field.Value()) + } + structType := arrow.StructOf(fields...) + return result.NewResult[arrow.DataType](structType) + + case schema_proto.LogicType_DICTIONARY: + keyType := FromProtobufField(dataType.Children[0]) + if !keyType.Status().IsOK() { + return result.NewResultFromStatus[arrow.DataType](*keyType.Status()) + } + valueType := FromProtobufField(dataType.Children[1]) + if !valueType.Status().IsOK() { + return result.NewResultFromStatus[arrow.DataType](*valueType.Status()) + } + dictType := &arrow.DictionaryType{ + IndexType: keyType.Value().Type, + ValueType: valueType.Value().Type, + } + return result.NewResult[arrow.DataType](dictType) + + case schema_proto.LogicType_MAP: + fieldType := FromProtobufField(dataType.Children[0]) + if !fieldType.Status().IsOK() { + return result.NewResultFromStatus[arrow.DataType](*fieldType.Status()) + } + //TODO FIX ME + return result.NewResult[arrow.DataType](arrow.MapOf(fieldType.Value().Type, fieldType.Value().Type)) + + case schema_proto.LogicType_FIXED_SIZE_BINARY: + + sizeBinaryType := arrow.FixedSizeBinaryType{ByteWidth: int(dataType.GetFixedSizeBinaryType().ByteWidth)} + return result.NewResult[arrow.DataType](&sizeBinaryType) + + case schema_proto.LogicType_FIXED_SIZE_LIST: + fieldType := FromProtobufField(dataType.Children[0]) + if !fieldType.Status().IsOK() { + return result.NewResultFromStatus[arrow.DataType](*fieldType.Status()) + } + fixedSizeListType := arrow.FixedSizeListOf(int32(int(dataType.GetFixedSizeListType().ListSize)), fieldType.Value().Type) + return result.NewResult[arrow.DataType](fixedSizeListType) + + default: + return result.NewResultFromStatus[arrow.DataType](status.InvalidArgument("invalid data type")) + } +} + +func GetNewParquetFilePath(path string) string { + scalarFileId := uuid.New() + return path + scalarFileId.String() + common.KParquetDataFileSuffix +} + +func GetManifestFilePath(path string) string { + return path + common.KManifestFileName +} + +func GetManifestTmpFilePath(path string) string { + return path + common.KManifestTempFileName +} diff --git a/go/file/fragment/deletefragment.go b/go/file/fragment/deletefragment.go new file mode 100644 index 00000000..3f34ebae --- /dev/null +++ b/go/file/fragment/deletefragment.go @@ -0,0 +1,28 @@ +package fragment + +import ( + "github.com/milvus-io/milvus-storage-format/io/fs" + "github.com/milvus-io/milvus-storage-format/storage/schema" +) + +type pkType any +type DeleteFragmentVector []DeleteFragment +type DeleteFragment struct { + id int64 + schema *schema.Schema + fs fs.Fs + data map[pkType][]int64 +} + +func NewDeleteFragment(id int64, schema *schema.Schema, fs fs.Fs) *DeleteFragment { + return &DeleteFragment{ + id: id, + schema: schema, + fs: fs, + data: make(map[pkType][]int64), + } +} + +func Make(f fs.Fs, s *schema.Schema, frag Fragment) DeleteFragment { + panic("implement me") +} diff --git a/go/file/fragment/fragment.go b/go/file/fragment/fragment.go new file mode 100644 index 00000000..b008e19d --- /dev/null +++ b/go/file/fragment/fragment.go @@ -0,0 +1,57 @@ +package fragment + +import "github.com/milvus-io/milvus-storage-format/proto/manifest_proto" + +type FragmentType int32 + +const ( + kUnknown FragmentType = 0 + kData FragmentType = 1 + kDelete FragmentType = 2 +) + +type Fragment struct { + fragmentId int64 + files []string +} + +type FragmentVector []Fragment + +func NewFragment(fragmentId int64) *Fragment { + return &Fragment{ + fragmentId: fragmentId, + } +} + +func (f *Fragment) AddFile(file string) { + f.files = append(f.files, file) +} + +func (f *Fragment) Files() []string { + return f.files +} + +func (f *Fragment) FragmentId() int64 { + return f.fragmentId +} + +func (f *Fragment) SetFragmentId(fragmentId int64) { + f.fragmentId = fragmentId +} + +func (f *Fragment) ToProtobuf() *manifest_proto.Fragment { + fragment := &manifest_proto.Fragment{} + fragment.Id = f.fragmentId + for _, file := range f.files { + fragment.Files = append(fragment.Files, file) + } + return fragment +} + +func FromProtobuf(fragment *manifest_proto.Fragment) *Fragment { + newFragment := NewFragment(fragment.Id) + for _, file := range fragment.Files { + newFragment.files = append(newFragment.files, file) + } + return newFragment +} diff --git a/go/go.mod b/go/go.mod index d9f8a877..69fbfead 100644 --- a/go/go.mod +++ b/go/go.mod @@ -7,6 +7,7 @@ require ( github.com/bits-and-blooms/bitset v1.5.0 github.com/google/uuid v1.3.0 github.com/stretchr/testify v1.8.2 + google.golang.org/protobuf v1.28.1 ) require ( @@ -37,6 +38,5 @@ require ( golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013 // indirect google.golang.org/grpc v1.49.0 // indirect - google.golang.org/protobuf v1.28.1 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go/internal/fs/factory.go b/go/internal/fs/factory.go deleted file mode 100644 index 6fb32556..00000000 --- a/go/internal/fs/factory.go +++ /dev/null @@ -1,21 +0,0 @@ -package fs - -import ( - "github.com/milvus-io/milvus-storage-format/options" -) - -type FsFactory struct { -} - -func (f *FsFactory) Create(fsType options.FsType) Fs { - switch fsType { - case options.InMemory: - return NewMemoryFs() - default: - panic("unknown fs type") - } -} - -func NewFsFactory() *FsFactory { - return &FsFactory{} -} diff --git a/go/internal/fs/fs.go b/go/internal/fs/fs.go deleted file mode 100644 index d1405827..00000000 --- a/go/internal/fs/fs.go +++ /dev/null @@ -1,7 +0,0 @@ -package fs - -import "github.com/milvus-io/milvus-storage-format/internal/fs/file" - -type Fs interface { - OpenFile(path string) (file.File, error) -} diff --git a/go/internal/manifest/manifest.go b/go/internal/manifest/manifest.go deleted file mode 100644 index b884d914..00000000 --- a/go/internal/manifest/manifest.go +++ /dev/null @@ -1,74 +0,0 @@ -package manifest - -import ( - "github.com/apache/arrow/go/v12/arrow" - "github.com/milvus-io/milvus-storage-format/internal/fs" -) - -type DataFile struct { - path string - cols []string -} - -func (d *DataFile) Path() string { - return d.path -} - -func NewDataFile(path string) *DataFile { - return &DataFile{path: path} -} - -type ManifestV1 struct { - dataFiles []*DataFile -} - -func (m *ManifestV1) AddDataFiles(files ...*DataFile) { - m.dataFiles = append(m.dataFiles, files...) -} - -func (m *ManifestV1) DataFiles() []*DataFile { - return m.dataFiles -} - -func NewManifest() *ManifestV1 { - return &ManifestV1{} -} - -func WriteManifestFile(fs fs.Fs, manifest *ManifestV1) error { - // TODO - return nil -} - -type ManifestV2 struct { - schema *arrow.Schema - schemas []*arrow.Schema - files []*DataFile -} - -func (m *ManifestV2) Schema() *arrow.Schema { - return m.schema -} - -func (m *ManifestV2) AddScalarDataFiles(files ...*DataFile) { - m.files = append(m.files, files...) - -} - -func (m *ManifestV2) AddVectorDataFiles(files ...*DataFile) { - m.files = append(m.files, files...) -} - -func (m *ManifestV2) ScalarSchema() *arrow.Schema { - panic("implement me") - return nil -} - -func (m *ManifestV2) VectorSchema() *arrow.Schema { - panic("implement me") - return nil -} - -func WriteManifestV2File(fs fs.Fs, manifest *ManifestV2) error { - panic("implement me") - return nil -} diff --git a/go/internal/format/parquet/file_reader.go b/go/io/format/parquet/file_reader.go similarity index 98% rename from go/internal/format/parquet/file_reader.go rename to go/io/format/parquet/file_reader.go index 56324872..72bef6af 100644 --- a/go/internal/format/parquet/file_reader.go +++ b/go/io/format/parquet/file_reader.go @@ -2,6 +2,7 @@ package parquet import ( "context" + "github.com/milvus-io/milvus-storage-format/storage/options" "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow/array" @@ -11,8 +12,7 @@ import ( "github.com/apache/arrow/go/v12/parquet/pqarrow" "github.com/bits-and-blooms/bitset" "github.com/milvus-io/milvus-storage-format/filter" - "github.com/milvus-io/milvus-storage-format/internal/fs" - "github.com/milvus-io/milvus-storage-format/options" + "github.com/milvus-io/milvus-storage-format/io/fs" ) type FileReader struct { diff --git a/go/internal/format/parquet/file_writer.go b/go/io/format/parquet/file_writer.go similarity index 88% rename from go/internal/format/parquet/file_writer.go rename to go/io/format/parquet/file_writer.go index 226bf12b..793eb5aa 100644 --- a/go/internal/format/parquet/file_writer.go +++ b/go/io/format/parquet/file_writer.go @@ -4,8 +4,8 @@ import ( "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/parquet" "github.com/apache/arrow/go/v12/parquet/pqarrow" - "github.com/milvus-io/milvus-storage-format/internal/format" - "github.com/milvus-io/milvus-storage-format/internal/fs" + "github.com/milvus-io/milvus-storage-format/io/format" + "github.com/milvus-io/milvus-storage-format/io/fs" ) var _ format.Writer = (*FileWriter)(nil) diff --git a/go/internal/format/reader.go b/go/io/format/reader.go similarity index 100% rename from go/internal/format/reader.go rename to go/io/format/reader.go diff --git a/go/internal/format/writer.go b/go/io/format/writer.go similarity index 100% rename from go/internal/format/writer.go rename to go/io/format/writer.go diff --git a/go/io/fs/factory.go b/go/io/fs/factory.go new file mode 100644 index 00000000..4bd411fc --- /dev/null +++ b/go/io/fs/factory.go @@ -0,0 +1,23 @@ +package fs + +import ( + "github.com/milvus-io/milvus-storage-format/storage/options" +) + +type Factory struct { +} + +func (f *Factory) Create(fsType options.FsType) Fs { + switch fsType { + case options.InMemory: + return NewMemoryFs() + case options.LocalFS: + return NewLocalFs() + default: + panic("unknown fs type") + } +} + +func NewFsFactory() *Factory { + return &Factory{} +} diff --git a/go/internal/fs/file/file.go b/go/io/fs/file/file.go similarity index 100% rename from go/internal/fs/file/file.go rename to go/io/fs/file/file.go diff --git a/go/io/fs/file/local_file.go b/go/io/fs/file/local_file.go new file mode 100644 index 00000000..c5621ef9 --- /dev/null +++ b/go/io/fs/file/local_file.go @@ -0,0 +1,31 @@ +package file + +import ( + "os" +) + +type LocalFile struct { + file os.File +} + +func (l *LocalFile) Write(p []byte) (n int, err error) { + return l.file.Write(p) +} + +func (l *LocalFile) ReadAt(p []byte, off int64) (n int, err error) { + return l.file.ReadAt(p, off) +} + +func (l *LocalFile) Seek(offset int64, whence int) (int64, error) { + return l.file.Seek(offset, whence) +} + +func (l *LocalFile) Close() error { + return l.file.Close() +} + +func NewLocalFile(f *os.File) *LocalFile { + return &LocalFile{ + file: *f, + } +} diff --git a/go/internal/fs/file/memory_file.go b/go/io/fs/file/memory_file.go similarity index 100% rename from go/internal/fs/file/memory_file.go rename to go/io/fs/file/memory_file.go diff --git a/go/io/fs/fs.go b/go/io/fs/fs.go new file mode 100644 index 00000000..49015a8b --- /dev/null +++ b/go/io/fs/fs.go @@ -0,0 +1,9 @@ +package fs + +import "github.com/milvus-io/milvus-storage-format/io/fs/file" + +type Fs interface { + OpenFile(path string) (file.File, error) + Rename(src string, dst string) error + DeleteFile(path string) error +} diff --git a/go/io/fs/local_fs.go b/go/io/fs/local_fs.go new file mode 100644 index 00000000..fbf95cd3 --- /dev/null +++ b/go/io/fs/local_fs.go @@ -0,0 +1,28 @@ +package fs + +import ( + "github.com/milvus-io/milvus-storage-format/io/fs/file" + "os" +) + +type LocalFS struct{} + +func (l *LocalFS) OpenFile(path string) (file.File, error) { + open, err := os.Open(path) + if err != nil { + return nil, err + } + return file.NewLocalFile(open), nil +} + +func (l *LocalFS) Rename(src string, dst string) error { + return os.Rename(src, dst) +} + +func (l *LocalFS) DeleteFile(path string) error { + return os.Remove(path) +} + +func NewLocalFs() *LocalFS { + return &LocalFS{} +} diff --git a/go/internal/fs/memory_fs.go b/go/io/fs/memory_fs.go similarity index 53% rename from go/internal/fs/memory_fs.go rename to go/io/fs/memory_fs.go index 71bb5abb..8e6db53d 100644 --- a/go/internal/fs/memory_fs.go +++ b/go/io/fs/memory_fs.go @@ -1,6 +1,6 @@ package fs -import "github.com/milvus-io/milvus-storage-format/internal/fs/file" +import "github.com/milvus-io/milvus-storage-format/io/fs/file" type MemoryFs struct { files map[string]*file.MemoryFile @@ -15,6 +15,20 @@ func (m *MemoryFs) OpenFile(path string) (file.File, error) { return f, nil } +func (m *MemoryFs) Rename(path string, path2 string) error { + if _, ok := m.files[path]; !ok { + return nil + } + m.files[path2] = m.files[path] + delete(m.files, path) + return nil +} + +func (m *MemoryFs) DeleteFile(path string) error { + delete(m.files, path) + return nil +} + func NewMemoryFs() *MemoryFs { return &MemoryFs{ files: make(map[string]*file.MemoryFile), diff --git a/go/main.go b/go/main.go new file mode 100644 index 00000000..06ab7d0f --- /dev/null +++ b/go/main.go @@ -0,0 +1 @@ +package main diff --git a/go/options/options.go b/go/options/options.go deleted file mode 100644 index e5229245..00000000 --- a/go/options/options.go +++ /dev/null @@ -1,23 +0,0 @@ -package options - -import "github.com/milvus-io/milvus-storage-format/filter" - -type FsType int8 - -const ( - InMemory FsType = iota -) - -type SpaceOptions struct { - Fs FsType - VectorColumns []string -} - -type ReadOptions struct { - Filters map[string]filter.Filter - Columns []string -} - -type WriteOptions struct { - MaxRowsPerFile int64 -} diff --git a/go/proto/manifest.proto b/go/proto/manifest.proto new file mode 100644 index 00000000..50cae471 --- /dev/null +++ b/go/proto/manifest.proto @@ -0,0 +1,20 @@ +syntax = "proto3"; +import "schema.proto"; +package manifest_proto; +option go_package = "github.com/milvus-io/milvus-storage-format/proto/manifest_proto;manifest_proto"; + +message Options { string uri = 1; } + +message Manifest { + int64 version = 1; + Options options = 2; + schema_proto.Schema schema = 3; + repeated Fragment scalar_fragments = 4; + repeated Fragment vector_fragments = 5; + repeated Fragment delete_fragments = 6; +} + +message Fragment { + int64 id = 1; + repeated string files = 2; +} diff --git a/go/proto/manifest_proto/manifest.pb.go b/go/proto/manifest_proto/manifest.pb.go new file mode 100644 index 00000000..96ad7152 --- /dev/null +++ b/go/proto/manifest_proto/manifest.pb.go @@ -0,0 +1,346 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.30.0 +// protoc v3.19.4 +// source: manifest.proto + +package manifest_proto + +import ( + schema_proto "github.com/milvus-io/milvus-storage-format/proto/schema_proto" + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type Options struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Uri string `protobuf:"bytes,1,opt,name=uri,proto3" json:"uri,omitempty"` +} + +func (x *Options) Reset() { + *x = Options{} + if protoimpl.UnsafeEnabled { + mi := &file_manifest_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Options) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Options) ProtoMessage() {} + +func (x *Options) ProtoReflect() protoreflect.Message { + mi := &file_manifest_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Options.ProtoReflect.Descriptor instead. +func (*Options) Descriptor() ([]byte, []int) { + return file_manifest_proto_rawDescGZIP(), []int{0} +} + +func (x *Options) GetUri() string { + if x != nil { + return x.Uri + } + return "" +} + +type Manifest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Version int64 `protobuf:"varint,1,opt,name=version,proto3" json:"version,omitempty"` + Options *Options `protobuf:"bytes,2,opt,name=options,proto3" json:"options,omitempty"` + Schema *schema_proto.Schema `protobuf:"bytes,3,opt,name=schema,proto3" json:"schema,omitempty"` + ScalarFragments []*Fragment `protobuf:"bytes,4,rep,name=scalar_fragments,json=scalarFragments,proto3" json:"scalar_fragments,omitempty"` + VectorFragments []*Fragment `protobuf:"bytes,5,rep,name=vector_fragments,json=vectorFragments,proto3" json:"vector_fragments,omitempty"` + DeleteFragments []*Fragment `protobuf:"bytes,6,rep,name=delete_fragments,json=deleteFragments,proto3" json:"delete_fragments,omitempty"` +} + +func (x *Manifest) Reset() { + *x = Manifest{} + if protoimpl.UnsafeEnabled { + mi := &file_manifest_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Manifest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Manifest) ProtoMessage() {} + +func (x *Manifest) ProtoReflect() protoreflect.Message { + mi := &file_manifest_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Manifest.ProtoReflect.Descriptor instead. +func (*Manifest) Descriptor() ([]byte, []int) { + return file_manifest_proto_rawDescGZIP(), []int{1} +} + +func (x *Manifest) GetVersion() int64 { + if x != nil { + return x.Version + } + return 0 +} + +func (x *Manifest) GetOptions() *Options { + if x != nil { + return x.Options + } + return nil +} + +func (x *Manifest) GetSchema() *schema_proto.Schema { + if x != nil { + return x.Schema + } + return nil +} + +func (x *Manifest) GetScalarFragments() []*Fragment { + if x != nil { + return x.ScalarFragments + } + return nil +} + +func (x *Manifest) GetVectorFragments() []*Fragment { + if x != nil { + return x.VectorFragments + } + return nil +} + +func (x *Manifest) GetDeleteFragments() []*Fragment { + if x != nil { + return x.DeleteFragments + } + return nil +} + +type Fragment struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Id int64 `protobuf:"varint,1,opt,name=id,proto3" json:"id,omitempty"` + Files []string `protobuf:"bytes,2,rep,name=files,proto3" json:"files,omitempty"` +} + +func (x *Fragment) Reset() { + *x = Fragment{} + if protoimpl.UnsafeEnabled { + mi := &file_manifest_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Fragment) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Fragment) ProtoMessage() {} + +func (x *Fragment) ProtoReflect() protoreflect.Message { + mi := &file_manifest_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Fragment.ProtoReflect.Descriptor instead. +func (*Fragment) Descriptor() ([]byte, []int) { + return file_manifest_proto_rawDescGZIP(), []int{2} +} + +func (x *Fragment) GetId() int64 { + if x != nil { + return x.Id + } + return 0 +} + +func (x *Fragment) GetFiles() []string { + if x != nil { + return x.Files + } + return nil +} + +var File_manifest_proto protoreflect.FileDescriptor + +var file_manifest_proto_rawDesc = []byte{ + 0x0a, 0x0e, 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x12, 0x0e, 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x1a, 0x0c, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x1b, + 0x0a, 0x07, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x75, 0x72, 0x69, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x75, 0x72, 0x69, 0x22, 0xd4, 0x02, 0x0a, 0x08, + 0x4d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, + 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x12, 0x31, 0x0a, 0x07, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74, 0x5f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x07, 0x6f, 0x70, + 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x2c, 0x0a, 0x06, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x14, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x52, 0x06, 0x73, 0x63, 0x68, + 0x65, 0x6d, 0x61, 0x12, 0x43, 0x0a, 0x10, 0x73, 0x63, 0x61, 0x6c, 0x61, 0x72, 0x5f, 0x66, 0x72, + 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x04, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, + 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x46, + 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x0f, 0x73, 0x63, 0x61, 0x6c, 0x61, 0x72, 0x46, + 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x43, 0x0a, 0x10, 0x76, 0x65, 0x63, 0x74, + 0x6f, 0x72, 0x5f, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x05, 0x20, 0x03, + 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74, 0x5f, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x52, 0x0f, 0x76, 0x65, + 0x63, 0x74, 0x6f, 0x72, 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x43, 0x0a, + 0x10, 0x64, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x5f, 0x66, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, + 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x18, 0x2e, 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, + 0x73, 0x74, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, + 0x74, 0x52, 0x0f, 0x64, 0x65, 0x6c, 0x65, 0x74, 0x65, 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, + 0x74, 0x73, 0x22, 0x30, 0x0a, 0x08, 0x46, 0x72, 0x61, 0x67, 0x6d, 0x65, 0x6e, 0x74, 0x12, 0x0e, + 0x0a, 0x02, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x02, 0x69, 0x64, 0x12, 0x14, + 0x0a, 0x05, 0x66, 0x69, 0x6c, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x05, 0x66, + 0x69, 0x6c, 0x65, 0x73, 0x42, 0x50, 0x5a, 0x4e, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, + 0x6f, 0x6d, 0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2d, 0x69, 0x6f, 0x2f, 0x6d, 0x69, 0x6c, + 0x76, 0x75, 0x73, 0x2d, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2d, 0x66, 0x6f, 0x72, 0x6d, + 0x61, 0x74, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, + 0x74, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x3b, 0x6d, 0x61, 0x6e, 0x69, 0x66, 0x65, 0x73, 0x74, + 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_manifest_proto_rawDescOnce sync.Once + file_manifest_proto_rawDescData = file_manifest_proto_rawDesc +) + +func file_manifest_proto_rawDescGZIP() []byte { + file_manifest_proto_rawDescOnce.Do(func() { + file_manifest_proto_rawDescData = protoimpl.X.CompressGZIP(file_manifest_proto_rawDescData) + }) + return file_manifest_proto_rawDescData +} + +var file_manifest_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_manifest_proto_goTypes = []interface{}{ + (*Options)(nil), // 0: manifest_proto.Options + (*Manifest)(nil), // 1: manifest_proto.Manifest + (*Fragment)(nil), // 2: manifest_proto.Fragment + (*schema_proto.Schema)(nil), // 3: schema_proto.Schema +} +var file_manifest_proto_depIdxs = []int32{ + 0, // 0: manifest_proto.Manifest.options:type_name -> manifest_proto.Options + 3, // 1: manifest_proto.Manifest.schema:type_name -> schema_proto.Schema + 2, // 2: manifest_proto.Manifest.scalar_fragments:type_name -> manifest_proto.Fragment + 2, // 3: manifest_proto.Manifest.vector_fragments:type_name -> manifest_proto.Fragment + 2, // 4: manifest_proto.Manifest.delete_fragments:type_name -> manifest_proto.Fragment + 5, // [5:5] is the sub-list for method output_type + 5, // [5:5] is the sub-list for method input_type + 5, // [5:5] is the sub-list for extension type_name + 5, // [5:5] is the sub-list for extension extendee + 0, // [0:5] is the sub-list for field type_name +} + +func init() { file_manifest_proto_init() } +func file_manifest_proto_init() { + if File_manifest_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_manifest_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Options); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_manifest_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Manifest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_manifest_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Fragment); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_manifest_proto_rawDesc, + NumEnums: 0, + NumMessages: 3, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_manifest_proto_goTypes, + DependencyIndexes: file_manifest_proto_depIdxs, + MessageInfos: file_manifest_proto_msgTypes, + }.Build() + File_manifest_proto = out.File + file_manifest_proto_rawDesc = nil + file_manifest_proto_goTypes = nil + file_manifest_proto_depIdxs = nil +} diff --git a/go/proto/schema.proto b/go/proto/schema.proto new file mode 100644 index 00000000..9172a0f0 --- /dev/null +++ b/go/proto/schema.proto @@ -0,0 +1,112 @@ +syntax = "proto3"; + +package schema_proto; +option go_package = "github.com/milvus-io/milvus-storage-format/proto/schema_proto;schema_proto"; + +enum LogicType { + NA = 0; + BOOL = 1; + UINT8 = 2; + INT8 = 3; + UINT16 = 4; + INT16 = 5; + UINT32 = 6; + INT32 = 7; + UINT64 = 8; + INT64 = 9; + HALF_FLOAT = 10; + FLOAT = 11; + DOUBLE = 12; + STRING = 13; + BINARY = 14; + FIXED_SIZE_BINARY = 15; + // DATE32 = 16; + // DATE64 = 17; + // TIMESTAMP = 18; + // TIME32 = 19; + // TIME64 = 20; + // INTERVAL_MONTHS = 21; + // INTERVAL_DAY_TIME = 22; + // DECIMAL128 = 23; + // option allow_alias = true; + // DECIMAL = 23; // DECIMAL==DECIMAL128 + // DECIMAL256 = 24; + LIST = 25; + STRUCT = 26; + // SPARSE_UNION = 27; + // DENSE_UNION = 28; + DICTIONARY = 29; + MAP = 30; + // EXTENSION = 31; + FIXED_SIZE_LIST = 32; + // DURATION = 33; + // LARGE_STRING = 34; + // LARGE_BINARY = 35; + // LARGE_LIST = 36; + // INTERVAL_MONTH_DAY_NANO = 37; + // RUN_END_ENCODED = 38; + MAX_ID = 39; +} + +enum Endianness { + Little = 0; + Big = 1; +} + +message FixedSizeBinaryType { + int32 byte_width = 1; +} + +message FixedSizeListType { + int32 list_size = 1; +} + +message DictionaryType { + DataType index_type = 1; + DataType value_type = 2; + bool ordered = 3; +} + +message MapType { + bool keys_sorted = 1; +} + +message DataType { + oneof type_related_values { + FixedSizeBinaryType fixed_size_binary_type = 1; + FixedSizeListType fixed_size_list_type = 2; + DictionaryType dictionary_type = 3; + MapType map_type = 4; + } + LogicType logic_type = 100; + repeated Field children = 101; +} + +message KeyValueMetadata { + repeated string keys = 1; + repeated string values = 2; +} + +message Field { + string name = 1; + bool nullable = 2; + DataType data_type = 3; + KeyValueMetadata metadata = 4; +} + +message SchemaOptions { + string primary_column = 1; + string version_column = 2; + string vector_column = 3; +} + +message ArrowSchema { + repeated Field fields = 1; + Endianness endianness = 2; + KeyValueMetadata metadata = 3; +} + +message Schema { + ArrowSchema arrow_schema = 1; + SchemaOptions schema_options = 2; +} diff --git a/go/proto/schema_proto/schema.pb.go b/go/proto/schema_proto/schema.pb.go new file mode 100644 index 00000000..3c6dcbe3 --- /dev/null +++ b/go/proto/schema_proto/schema.pb.go @@ -0,0 +1,1156 @@ +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.30.0 +// protoc v3.19.4 +// source: schema.proto + +package schema_proto + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +type LogicType int32 + +const ( + LogicType_NA LogicType = 0 + LogicType_BOOL LogicType = 1 + LogicType_UINT8 LogicType = 2 + LogicType_INT8 LogicType = 3 + LogicType_UINT16 LogicType = 4 + LogicType_INT16 LogicType = 5 + LogicType_UINT32 LogicType = 6 + LogicType_INT32 LogicType = 7 + LogicType_UINT64 LogicType = 8 + LogicType_INT64 LogicType = 9 + LogicType_HALF_FLOAT LogicType = 10 + LogicType_FLOAT LogicType = 11 + LogicType_DOUBLE LogicType = 12 + LogicType_STRING LogicType = 13 + LogicType_BINARY LogicType = 14 + LogicType_FIXED_SIZE_BINARY LogicType = 15 + // DATE32 = 16; + // DATE64 = 17; + // TIMESTAMP = 18; + // TIME32 = 19; + // TIME64 = 20; + // INTERVAL_MONTHS = 21; + // INTERVAL_DAY_TIME = 22; + // DECIMAL128 = 23; + // option allow_alias = true; + // DECIMAL = 23; // DECIMAL==DECIMAL128 + // DECIMAL256 = 24; + LogicType_LIST LogicType = 25 + LogicType_STRUCT LogicType = 26 + // SPARSE_UNION = 27; + // DENSE_UNION = 28; + LogicType_DICTIONARY LogicType = 29 + LogicType_MAP LogicType = 30 + // EXTENSION = 31; + LogicType_FIXED_SIZE_LIST LogicType = 32 + // DURATION = 33; + // LARGE_STRING = 34; + // LARGE_BINARY = 35; + // LARGE_LIST = 36; + // INTERVAL_MONTH_DAY_NANO = 37; + // RUN_END_ENCODED = 38; + LogicType_MAX_ID LogicType = 39 +) + +// Enum value maps for LogicType. +var ( + LogicType_name = map[int32]string{ + 0: "NA", + 1: "BOOL", + 2: "UINT8", + 3: "INT8", + 4: "UINT16", + 5: "INT16", + 6: "UINT32", + 7: "INT32", + 8: "UINT64", + 9: "INT64", + 10: "HALF_FLOAT", + 11: "FLOAT", + 12: "DOUBLE", + 13: "STRING", + 14: "BINARY", + 15: "FIXED_SIZE_BINARY", + 25: "LIST", + 26: "STRUCT", + 29: "DICTIONARY", + 30: "MAP", + 32: "FIXED_SIZE_LIST", + 39: "MAX_ID", + } + LogicType_value = map[string]int32{ + "NA": 0, + "BOOL": 1, + "UINT8": 2, + "INT8": 3, + "UINT16": 4, + "INT16": 5, + "UINT32": 6, + "INT32": 7, + "UINT64": 8, + "INT64": 9, + "HALF_FLOAT": 10, + "FLOAT": 11, + "DOUBLE": 12, + "STRING": 13, + "BINARY": 14, + "FIXED_SIZE_BINARY": 15, + "LIST": 25, + "STRUCT": 26, + "DICTIONARY": 29, + "MAP": 30, + "FIXED_SIZE_LIST": 32, + "MAX_ID": 39, + } +) + +func (x LogicType) Enum() *LogicType { + p := new(LogicType) + *p = x + return p +} + +func (x LogicType) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (LogicType) Descriptor() protoreflect.EnumDescriptor { + return file_schema_proto_enumTypes[0].Descriptor() +} + +func (LogicType) Type() protoreflect.EnumType { + return &file_schema_proto_enumTypes[0] +} + +func (x LogicType) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use LogicType.Descriptor instead. +func (LogicType) EnumDescriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{0} +} + +type Endianness int32 + +const ( + Endianness_Little Endianness = 0 + Endianness_Big Endianness = 1 +) + +// Enum value maps for Endianness. +var ( + Endianness_name = map[int32]string{ + 0: "Little", + 1: "Big", + } + Endianness_value = map[string]int32{ + "Little": 0, + "Big": 1, + } +) + +func (x Endianness) Enum() *Endianness { + p := new(Endianness) + *p = x + return p +} + +func (x Endianness) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (Endianness) Descriptor() protoreflect.EnumDescriptor { + return file_schema_proto_enumTypes[1].Descriptor() +} + +func (Endianness) Type() protoreflect.EnumType { + return &file_schema_proto_enumTypes[1] +} + +func (x Endianness) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use Endianness.Descriptor instead. +func (Endianness) EnumDescriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{1} +} + +type FixedSizeBinaryType struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ByteWidth int32 `protobuf:"varint,1,opt,name=byte_width,json=byteWidth,proto3" json:"byte_width,omitempty"` +} + +func (x *FixedSizeBinaryType) Reset() { + *x = FixedSizeBinaryType{} + if protoimpl.UnsafeEnabled { + mi := &file_schema_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *FixedSizeBinaryType) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FixedSizeBinaryType) ProtoMessage() {} + +func (x *FixedSizeBinaryType) ProtoReflect() protoreflect.Message { + mi := &file_schema_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FixedSizeBinaryType.ProtoReflect.Descriptor instead. +func (*FixedSizeBinaryType) Descriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{0} +} + +func (x *FixedSizeBinaryType) GetByteWidth() int32 { + if x != nil { + return x.ByteWidth + } + return 0 +} + +type FixedSizeListType struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ListSize int32 `protobuf:"varint,1,opt,name=list_size,json=listSize,proto3" json:"list_size,omitempty"` +} + +func (x *FixedSizeListType) Reset() { + *x = FixedSizeListType{} + if protoimpl.UnsafeEnabled { + mi := &file_schema_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *FixedSizeListType) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FixedSizeListType) ProtoMessage() {} + +func (x *FixedSizeListType) ProtoReflect() protoreflect.Message { + mi := &file_schema_proto_msgTypes[1] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FixedSizeListType.ProtoReflect.Descriptor instead. +func (*FixedSizeListType) Descriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{1} +} + +func (x *FixedSizeListType) GetListSize() int32 { + if x != nil { + return x.ListSize + } + return 0 +} + +type DictionaryType struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + IndexType *DataType `protobuf:"bytes,1,opt,name=index_type,json=indexType,proto3" json:"index_type,omitempty"` + ValueType *DataType `protobuf:"bytes,2,opt,name=value_type,json=valueType,proto3" json:"value_type,omitempty"` + Ordered bool `protobuf:"varint,3,opt,name=ordered,proto3" json:"ordered,omitempty"` +} + +func (x *DictionaryType) Reset() { + *x = DictionaryType{} + if protoimpl.UnsafeEnabled { + mi := &file_schema_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DictionaryType) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DictionaryType) ProtoMessage() {} + +func (x *DictionaryType) ProtoReflect() protoreflect.Message { + mi := &file_schema_proto_msgTypes[2] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DictionaryType.ProtoReflect.Descriptor instead. +func (*DictionaryType) Descriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{2} +} + +func (x *DictionaryType) GetIndexType() *DataType { + if x != nil { + return x.IndexType + } + return nil +} + +func (x *DictionaryType) GetValueType() *DataType { + if x != nil { + return x.ValueType + } + return nil +} + +func (x *DictionaryType) GetOrdered() bool { + if x != nil { + return x.Ordered + } + return false +} + +type MapType struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + KeysSorted bool `protobuf:"varint,1,opt,name=keys_sorted,json=keysSorted,proto3" json:"keys_sorted,omitempty"` +} + +func (x *MapType) Reset() { + *x = MapType{} + if protoimpl.UnsafeEnabled { + mi := &file_schema_proto_msgTypes[3] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *MapType) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*MapType) ProtoMessage() {} + +func (x *MapType) ProtoReflect() protoreflect.Message { + mi := &file_schema_proto_msgTypes[3] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use MapType.ProtoReflect.Descriptor instead. +func (*MapType) Descriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{3} +} + +func (x *MapType) GetKeysSorted() bool { + if x != nil { + return x.KeysSorted + } + return false +} + +type DataType struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Types that are assignable to TypeRelatedValues: + // + // *DataType_FixedSizeBinaryType + // *DataType_FixedSizeListType + // *DataType_DictionaryType + // *DataType_MapType + TypeRelatedValues isDataType_TypeRelatedValues `protobuf_oneof:"type_related_values"` + LogicType LogicType `protobuf:"varint,100,opt,name=logic_type,json=logicType,proto3,enum=schema_proto.LogicType" json:"logic_type,omitempty"` + Children []*Field `protobuf:"bytes,101,rep,name=children,proto3" json:"children,omitempty"` +} + +func (x *DataType) Reset() { + *x = DataType{} + if protoimpl.UnsafeEnabled { + mi := &file_schema_proto_msgTypes[4] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *DataType) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*DataType) ProtoMessage() {} + +func (x *DataType) ProtoReflect() protoreflect.Message { + mi := &file_schema_proto_msgTypes[4] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use DataType.ProtoReflect.Descriptor instead. +func (*DataType) Descriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{4} +} + +func (m *DataType) GetTypeRelatedValues() isDataType_TypeRelatedValues { + if m != nil { + return m.TypeRelatedValues + } + return nil +} + +func (x *DataType) GetFixedSizeBinaryType() *FixedSizeBinaryType { + if x, ok := x.GetTypeRelatedValues().(*DataType_FixedSizeBinaryType); ok { + return x.FixedSizeBinaryType + } + return nil +} + +func (x *DataType) GetFixedSizeListType() *FixedSizeListType { + if x, ok := x.GetTypeRelatedValues().(*DataType_FixedSizeListType); ok { + return x.FixedSizeListType + } + return nil +} + +func (x *DataType) GetDictionaryType() *DictionaryType { + if x, ok := x.GetTypeRelatedValues().(*DataType_DictionaryType); ok { + return x.DictionaryType + } + return nil +} + +func (x *DataType) GetMapType() *MapType { + if x, ok := x.GetTypeRelatedValues().(*DataType_MapType); ok { + return x.MapType + } + return nil +} + +func (x *DataType) GetLogicType() LogicType { + if x != nil { + return x.LogicType + } + return LogicType_NA +} + +func (x *DataType) GetChildren() []*Field { + if x != nil { + return x.Children + } + return nil +} + +type isDataType_TypeRelatedValues interface { + isDataType_TypeRelatedValues() +} + +type DataType_FixedSizeBinaryType struct { + FixedSizeBinaryType *FixedSizeBinaryType `protobuf:"bytes,1,opt,name=fixed_size_binary_type,json=fixedSizeBinaryType,proto3,oneof"` +} + +type DataType_FixedSizeListType struct { + FixedSizeListType *FixedSizeListType `protobuf:"bytes,2,opt,name=fixed_size_list_type,json=fixedSizeListType,proto3,oneof"` +} + +type DataType_DictionaryType struct { + DictionaryType *DictionaryType `protobuf:"bytes,3,opt,name=dictionary_type,json=dictionaryType,proto3,oneof"` +} + +type DataType_MapType struct { + MapType *MapType `protobuf:"bytes,4,opt,name=map_type,json=mapType,proto3,oneof"` +} + +func (*DataType_FixedSizeBinaryType) isDataType_TypeRelatedValues() {} + +func (*DataType_FixedSizeListType) isDataType_TypeRelatedValues() {} + +func (*DataType_DictionaryType) isDataType_TypeRelatedValues() {} + +func (*DataType_MapType) isDataType_TypeRelatedValues() {} + +type KeyValueMetadata struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Keys []string `protobuf:"bytes,1,rep,name=keys,proto3" json:"keys,omitempty"` + Values []string `protobuf:"bytes,2,rep,name=values,proto3" json:"values,omitempty"` +} + +func (x *KeyValueMetadata) Reset() { + *x = KeyValueMetadata{} + if protoimpl.UnsafeEnabled { + mi := &file_schema_proto_msgTypes[5] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *KeyValueMetadata) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*KeyValueMetadata) ProtoMessage() {} + +func (x *KeyValueMetadata) ProtoReflect() protoreflect.Message { + mi := &file_schema_proto_msgTypes[5] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use KeyValueMetadata.ProtoReflect.Descriptor instead. +func (*KeyValueMetadata) Descriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{5} +} + +func (x *KeyValueMetadata) GetKeys() []string { + if x != nil { + return x.Keys + } + return nil +} + +func (x *KeyValueMetadata) GetValues() []string { + if x != nil { + return x.Values + } + return nil +} + +type Field struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Name string `protobuf:"bytes,1,opt,name=name,proto3" json:"name,omitempty"` + Nullable bool `protobuf:"varint,2,opt,name=nullable,proto3" json:"nullable,omitempty"` + DataType *DataType `protobuf:"bytes,3,opt,name=data_type,json=dataType,proto3" json:"data_type,omitempty"` + Metadata *KeyValueMetadata `protobuf:"bytes,4,opt,name=metadata,proto3" json:"metadata,omitempty"` +} + +func (x *Field) Reset() { + *x = Field{} + if protoimpl.UnsafeEnabled { + mi := &file_schema_proto_msgTypes[6] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Field) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Field) ProtoMessage() {} + +func (x *Field) ProtoReflect() protoreflect.Message { + mi := &file_schema_proto_msgTypes[6] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Field.ProtoReflect.Descriptor instead. +func (*Field) Descriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{6} +} + +func (x *Field) GetName() string { + if x != nil { + return x.Name + } + return "" +} + +func (x *Field) GetNullable() bool { + if x != nil { + return x.Nullable + } + return false +} + +func (x *Field) GetDataType() *DataType { + if x != nil { + return x.DataType + } + return nil +} + +func (x *Field) GetMetadata() *KeyValueMetadata { + if x != nil { + return x.Metadata + } + return nil +} + +type SchemaOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + PrimaryColumn string `protobuf:"bytes,1,opt,name=primary_column,json=primaryColumn,proto3" json:"primary_column,omitempty"` + VersionColumn string `protobuf:"bytes,2,opt,name=version_column,json=versionColumn,proto3" json:"version_column,omitempty"` + VectorColumn string `protobuf:"bytes,3,opt,name=vector_column,json=vectorColumn,proto3" json:"vector_column,omitempty"` +} + +func (x *SchemaOptions) Reset() { + *x = SchemaOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_schema_proto_msgTypes[7] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *SchemaOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*SchemaOptions) ProtoMessage() {} + +func (x *SchemaOptions) ProtoReflect() protoreflect.Message { + mi := &file_schema_proto_msgTypes[7] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use SchemaOptions.ProtoReflect.Descriptor instead. +func (*SchemaOptions) Descriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{7} +} + +func (x *SchemaOptions) GetPrimaryColumn() string { + if x != nil { + return x.PrimaryColumn + } + return "" +} + +func (x *SchemaOptions) GetVersionColumn() string { + if x != nil { + return x.VersionColumn + } + return "" +} + +func (x *SchemaOptions) GetVectorColumn() string { + if x != nil { + return x.VectorColumn + } + return "" +} + +type ArrowSchema struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Fields []*Field `protobuf:"bytes,1,rep,name=fields,proto3" json:"fields,omitempty"` + Endianness Endianness `protobuf:"varint,2,opt,name=endianness,proto3,enum=schema_proto.Endianness" json:"endianness,omitempty"` + Metadata *KeyValueMetadata `protobuf:"bytes,3,opt,name=metadata,proto3" json:"metadata,omitempty"` +} + +func (x *ArrowSchema) Reset() { + *x = ArrowSchema{} + if protoimpl.UnsafeEnabled { + mi := &file_schema_proto_msgTypes[8] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ArrowSchema) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ArrowSchema) ProtoMessage() {} + +func (x *ArrowSchema) ProtoReflect() protoreflect.Message { + mi := &file_schema_proto_msgTypes[8] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ArrowSchema.ProtoReflect.Descriptor instead. +func (*ArrowSchema) Descriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{8} +} + +func (x *ArrowSchema) GetFields() []*Field { + if x != nil { + return x.Fields + } + return nil +} + +func (x *ArrowSchema) GetEndianness() Endianness { + if x != nil { + return x.Endianness + } + return Endianness_Little +} + +func (x *ArrowSchema) GetMetadata() *KeyValueMetadata { + if x != nil { + return x.Metadata + } + return nil +} + +type Schema struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + ArrowSchema *ArrowSchema `protobuf:"bytes,1,opt,name=arrow_schema,json=arrowSchema,proto3" json:"arrow_schema,omitempty"` + SchemaOptions *SchemaOptions `protobuf:"bytes,2,opt,name=schema_options,json=schemaOptions,proto3" json:"schema_options,omitempty"` +} + +func (x *Schema) Reset() { + *x = Schema{} + if protoimpl.UnsafeEnabled { + mi := &file_schema_proto_msgTypes[9] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *Schema) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*Schema) ProtoMessage() {} + +func (x *Schema) ProtoReflect() protoreflect.Message { + mi := &file_schema_proto_msgTypes[9] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use Schema.ProtoReflect.Descriptor instead. +func (*Schema) Descriptor() ([]byte, []int) { + return file_schema_proto_rawDescGZIP(), []int{9} +} + +func (x *Schema) GetArrowSchema() *ArrowSchema { + if x != nil { + return x.ArrowSchema + } + return nil +} + +func (x *Schema) GetSchemaOptions() *SchemaOptions { + if x != nil { + return x.SchemaOptions + } + return nil +} + +var File_schema_proto protoreflect.FileDescriptor + +var file_schema_proto_rawDesc = []byte{ + 0x0a, 0x0c, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, 0x0c, + 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x34, 0x0a, 0x13, + 0x46, 0x69, 0x78, 0x65, 0x64, 0x53, 0x69, 0x7a, 0x65, 0x42, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x54, + 0x79, 0x70, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x62, 0x79, 0x74, 0x65, 0x5f, 0x77, 0x69, 0x64, 0x74, + 0x68, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x09, 0x62, 0x79, 0x74, 0x65, 0x57, 0x69, 0x64, + 0x74, 0x68, 0x22, 0x30, 0x0a, 0x11, 0x46, 0x69, 0x78, 0x65, 0x64, 0x53, 0x69, 0x7a, 0x65, 0x4c, + 0x69, 0x73, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x6c, 0x69, 0x73, 0x74, 0x5f, + 0x73, 0x69, 0x7a, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x6c, 0x69, 0x73, 0x74, + 0x53, 0x69, 0x7a, 0x65, 0x22, 0x98, 0x01, 0x0a, 0x0e, 0x44, 0x69, 0x63, 0x74, 0x69, 0x6f, 0x6e, + 0x61, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, 0x35, 0x0a, 0x0a, 0x69, 0x6e, 0x64, 0x65, 0x78, + 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x73, 0x63, + 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x54, + 0x79, 0x70, 0x65, 0x52, 0x09, 0x69, 0x6e, 0x64, 0x65, 0x78, 0x54, 0x79, 0x70, 0x65, 0x12, 0x35, + 0x0a, 0x0a, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x2e, 0x44, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x09, 0x76, 0x61, 0x6c, 0x75, + 0x65, 0x54, 0x79, 0x70, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x65, 0x64, + 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x07, 0x6f, 0x72, 0x64, 0x65, 0x72, 0x65, 0x64, 0x22, + 0x2a, 0x0a, 0x07, 0x4d, 0x61, 0x70, 0x54, 0x79, 0x70, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x6b, 0x65, + 0x79, 0x73, 0x5f, 0x73, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, + 0x0a, 0x6b, 0x65, 0x79, 0x73, 0x53, 0x6f, 0x72, 0x74, 0x65, 0x64, 0x22, 0xb5, 0x03, 0x0a, 0x08, + 0x44, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x58, 0x0a, 0x16, 0x66, 0x69, 0x78, 0x65, + 0x64, 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x5f, 0x62, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x5f, 0x74, 0x79, + 0x70, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x21, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, + 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x46, 0x69, 0x78, 0x65, 0x64, 0x53, 0x69, 0x7a, + 0x65, 0x42, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x13, 0x66, + 0x69, 0x78, 0x65, 0x64, 0x53, 0x69, 0x7a, 0x65, 0x42, 0x69, 0x6e, 0x61, 0x72, 0x79, 0x54, 0x79, + 0x70, 0x65, 0x12, 0x52, 0x0a, 0x14, 0x66, 0x69, 0x78, 0x65, 0x64, 0x5f, 0x73, 0x69, 0x7a, 0x65, + 0x5f, 0x6c, 0x69, 0x73, 0x74, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x1f, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x46, 0x69, 0x78, 0x65, 0x64, 0x53, 0x69, 0x7a, 0x65, 0x4c, 0x69, 0x73, 0x74, 0x54, 0x79, 0x70, + 0x65, 0x48, 0x00, 0x52, 0x11, 0x66, 0x69, 0x78, 0x65, 0x64, 0x53, 0x69, 0x7a, 0x65, 0x4c, 0x69, + 0x73, 0x74, 0x54, 0x79, 0x70, 0x65, 0x12, 0x47, 0x0a, 0x0f, 0x64, 0x69, 0x63, 0x74, 0x69, 0x6f, + 0x6e, 0x61, 0x72, 0x79, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, + 0x1c, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x44, + 0x69, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, + 0x0e, 0x64, 0x69, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x61, 0x72, 0x79, 0x54, 0x79, 0x70, 0x65, 0x12, + 0x32, 0x0a, 0x08, 0x6d, 0x61, 0x70, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, + 0x0b, 0x32, 0x15, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0x2e, 0x4d, 0x61, 0x70, 0x54, 0x79, 0x70, 0x65, 0x48, 0x00, 0x52, 0x07, 0x6d, 0x61, 0x70, 0x54, + 0x79, 0x70, 0x65, 0x12, 0x36, 0x0a, 0x0a, 0x6c, 0x6f, 0x67, 0x69, 0x63, 0x5f, 0x74, 0x79, 0x70, + 0x65, 0x18, 0x64, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x17, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, + 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4c, 0x6f, 0x67, 0x69, 0x63, 0x54, 0x79, 0x70, 0x65, + 0x52, 0x09, 0x6c, 0x6f, 0x67, 0x69, 0x63, 0x54, 0x79, 0x70, 0x65, 0x12, 0x2f, 0x0a, 0x08, 0x63, + 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x18, 0x65, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, + 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x46, 0x69, 0x65, + 0x6c, 0x64, 0x52, 0x08, 0x63, 0x68, 0x69, 0x6c, 0x64, 0x72, 0x65, 0x6e, 0x42, 0x15, 0x0a, 0x13, + 0x74, 0x79, 0x70, 0x65, 0x5f, 0x72, 0x65, 0x6c, 0x61, 0x74, 0x65, 0x64, 0x5f, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x73, 0x22, 0x3e, 0x0a, 0x10, 0x4b, 0x65, 0x79, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4d, + 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x18, + 0x01, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x6b, 0x65, 0x79, 0x73, 0x12, 0x16, 0x0a, 0x06, 0x76, + 0x61, 0x6c, 0x75, 0x65, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x06, 0x76, 0x61, 0x6c, + 0x75, 0x65, 0x73, 0x22, 0xa8, 0x01, 0x0a, 0x05, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x12, 0x12, 0x0a, + 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, + 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x08, 0x52, 0x08, 0x6e, 0x75, 0x6c, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x33, 0x0a, + 0x09, 0x64, 0x61, 0x74, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x16, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, + 0x44, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x52, 0x08, 0x64, 0x61, 0x74, 0x61, 0x54, 0x79, + 0x70, 0x65, 0x12, 0x3a, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x4b, 0x65, 0x79, 0x56, 0x61, 0x6c, 0x75, 0x65, 0x4d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x82, + 0x01, 0x0a, 0x0d, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, + 0x12, 0x25, 0x0a, 0x0e, 0x70, 0x72, 0x69, 0x6d, 0x61, 0x72, 0x79, 0x5f, 0x63, 0x6f, 0x6c, 0x75, + 0x6d, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x70, 0x72, 0x69, 0x6d, 0x61, 0x72, + 0x79, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x25, 0x0a, 0x0e, 0x76, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x5f, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0d, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x43, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x12, 0x23, + 0x0a, 0x0d, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x5f, 0x63, 0x6f, 0x6c, 0x75, 0x6d, 0x6e, 0x18, + 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x76, 0x65, 0x63, 0x74, 0x6f, 0x72, 0x43, 0x6f, 0x6c, + 0x75, 0x6d, 0x6e, 0x22, 0xb0, 0x01, 0x0a, 0x0b, 0x41, 0x72, 0x72, 0x6f, 0x77, 0x53, 0x63, 0x68, + 0x65, 0x6d, 0x61, 0x12, 0x2b, 0x0a, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, 0x18, 0x01, 0x20, + 0x03, 0x28, 0x0b, 0x32, 0x13, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x2e, 0x46, 0x69, 0x65, 0x6c, 0x64, 0x52, 0x06, 0x66, 0x69, 0x65, 0x6c, 0x64, 0x73, + 0x12, 0x38, 0x0a, 0x0a, 0x65, 0x6e, 0x64, 0x69, 0x61, 0x6e, 0x6e, 0x65, 0x73, 0x73, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x0e, 0x32, 0x18, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x2e, 0x45, 0x6e, 0x64, 0x69, 0x61, 0x6e, 0x6e, 0x65, 0x73, 0x73, 0x52, 0x0a, + 0x65, 0x6e, 0x64, 0x69, 0x61, 0x6e, 0x6e, 0x65, 0x73, 0x73, 0x12, 0x3a, 0x0a, 0x08, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1e, 0x2e, 0x73, + 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x4b, 0x65, 0x79, 0x56, + 0x61, 0x6c, 0x75, 0x65, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x22, 0x8a, 0x01, 0x0a, 0x06, 0x53, 0x63, 0x68, 0x65, 0x6d, + 0x61, 0x12, 0x3c, 0x0a, 0x0c, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x5f, 0x73, 0x63, 0x68, 0x65, 0x6d, + 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, + 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x41, 0x72, 0x72, 0x6f, 0x77, 0x53, 0x63, 0x68, 0x65, + 0x6d, 0x61, 0x52, 0x0b, 0x61, 0x72, 0x72, 0x6f, 0x77, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x12, + 0x42, 0x0a, 0x0e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, + 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1b, 0x2e, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, + 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2e, 0x53, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x4f, 0x70, 0x74, + 0x69, 0x6f, 0x6e, 0x73, 0x52, 0x0d, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x4f, 0x70, 0x74, 0x69, + 0x6f, 0x6e, 0x73, 0x2a, 0x9d, 0x02, 0x0a, 0x09, 0x4c, 0x6f, 0x67, 0x69, 0x63, 0x54, 0x79, 0x70, + 0x65, 0x12, 0x06, 0x0a, 0x02, 0x4e, 0x41, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x42, 0x4f, 0x4f, + 0x4c, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x55, 0x49, 0x4e, 0x54, 0x38, 0x10, 0x02, 0x12, 0x08, + 0x0a, 0x04, 0x49, 0x4e, 0x54, 0x38, 0x10, 0x03, 0x12, 0x0a, 0x0a, 0x06, 0x55, 0x49, 0x4e, 0x54, + 0x31, 0x36, 0x10, 0x04, 0x12, 0x09, 0x0a, 0x05, 0x49, 0x4e, 0x54, 0x31, 0x36, 0x10, 0x05, 0x12, + 0x0a, 0x0a, 0x06, 0x55, 0x49, 0x4e, 0x54, 0x33, 0x32, 0x10, 0x06, 0x12, 0x09, 0x0a, 0x05, 0x49, + 0x4e, 0x54, 0x33, 0x32, 0x10, 0x07, 0x12, 0x0a, 0x0a, 0x06, 0x55, 0x49, 0x4e, 0x54, 0x36, 0x34, + 0x10, 0x08, 0x12, 0x09, 0x0a, 0x05, 0x49, 0x4e, 0x54, 0x36, 0x34, 0x10, 0x09, 0x12, 0x0e, 0x0a, + 0x0a, 0x48, 0x41, 0x4c, 0x46, 0x5f, 0x46, 0x4c, 0x4f, 0x41, 0x54, 0x10, 0x0a, 0x12, 0x09, 0x0a, + 0x05, 0x46, 0x4c, 0x4f, 0x41, 0x54, 0x10, 0x0b, 0x12, 0x0a, 0x0a, 0x06, 0x44, 0x4f, 0x55, 0x42, + 0x4c, 0x45, 0x10, 0x0c, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x54, 0x52, 0x49, 0x4e, 0x47, 0x10, 0x0d, + 0x12, 0x0a, 0x0a, 0x06, 0x42, 0x49, 0x4e, 0x41, 0x52, 0x59, 0x10, 0x0e, 0x12, 0x15, 0x0a, 0x11, + 0x46, 0x49, 0x58, 0x45, 0x44, 0x5f, 0x53, 0x49, 0x5a, 0x45, 0x5f, 0x42, 0x49, 0x4e, 0x41, 0x52, + 0x59, 0x10, 0x0f, 0x12, 0x08, 0x0a, 0x04, 0x4c, 0x49, 0x53, 0x54, 0x10, 0x19, 0x12, 0x0a, 0x0a, + 0x06, 0x53, 0x54, 0x52, 0x55, 0x43, 0x54, 0x10, 0x1a, 0x12, 0x0e, 0x0a, 0x0a, 0x44, 0x49, 0x43, + 0x54, 0x49, 0x4f, 0x4e, 0x41, 0x52, 0x59, 0x10, 0x1d, 0x12, 0x07, 0x0a, 0x03, 0x4d, 0x41, 0x50, + 0x10, 0x1e, 0x12, 0x13, 0x0a, 0x0f, 0x46, 0x49, 0x58, 0x45, 0x44, 0x5f, 0x53, 0x49, 0x5a, 0x45, + 0x5f, 0x4c, 0x49, 0x53, 0x54, 0x10, 0x20, 0x12, 0x0a, 0x0a, 0x06, 0x4d, 0x41, 0x58, 0x5f, 0x49, + 0x44, 0x10, 0x27, 0x2a, 0x21, 0x0a, 0x0a, 0x45, 0x6e, 0x64, 0x69, 0x61, 0x6e, 0x6e, 0x65, 0x73, + 0x73, 0x12, 0x0a, 0x0a, 0x06, 0x4c, 0x69, 0x74, 0x74, 0x6c, 0x65, 0x10, 0x00, 0x12, 0x07, 0x0a, + 0x03, 0x42, 0x69, 0x67, 0x10, 0x01, 0x42, 0x4c, 0x5a, 0x4a, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, + 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6d, 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2d, 0x69, 0x6f, 0x2f, 0x6d, + 0x69, 0x6c, 0x76, 0x75, 0x73, 0x2d, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2d, 0x66, 0x6f, + 0x72, 0x6d, 0x61, 0x74, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x73, 0x63, 0x68, 0x65, 0x6d, + 0x61, 0x5f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x3b, 0x73, 0x63, 0x68, 0x65, 0x6d, 0x61, 0x5f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, +} + +var ( + file_schema_proto_rawDescOnce sync.Once + file_schema_proto_rawDescData = file_schema_proto_rawDesc +) + +func file_schema_proto_rawDescGZIP() []byte { + file_schema_proto_rawDescOnce.Do(func() { + file_schema_proto_rawDescData = protoimpl.X.CompressGZIP(file_schema_proto_rawDescData) + }) + return file_schema_proto_rawDescData +} + +var file_schema_proto_enumTypes = make([]protoimpl.EnumInfo, 2) +var file_schema_proto_msgTypes = make([]protoimpl.MessageInfo, 10) +var file_schema_proto_goTypes = []interface{}{ + (LogicType)(0), // 0: schema_proto.LogicType + (Endianness)(0), // 1: schema_proto.Endianness + (*FixedSizeBinaryType)(nil), // 2: schema_proto.FixedSizeBinaryType + (*FixedSizeListType)(nil), // 3: schema_proto.FixedSizeListType + (*DictionaryType)(nil), // 4: schema_proto.DictionaryType + (*MapType)(nil), // 5: schema_proto.MapType + (*DataType)(nil), // 6: schema_proto.DataType + (*KeyValueMetadata)(nil), // 7: schema_proto.KeyValueMetadata + (*Field)(nil), // 8: schema_proto.Field + (*SchemaOptions)(nil), // 9: schema_proto.SchemaOptions + (*ArrowSchema)(nil), // 10: schema_proto.ArrowSchema + (*Schema)(nil), // 11: schema_proto.Schema +} +var file_schema_proto_depIdxs = []int32{ + 6, // 0: schema_proto.DictionaryType.index_type:type_name -> schema_proto.DataType + 6, // 1: schema_proto.DictionaryType.value_type:type_name -> schema_proto.DataType + 2, // 2: schema_proto.DataType.fixed_size_binary_type:type_name -> schema_proto.FixedSizeBinaryType + 3, // 3: schema_proto.DataType.fixed_size_list_type:type_name -> schema_proto.FixedSizeListType + 4, // 4: schema_proto.DataType.dictionary_type:type_name -> schema_proto.DictionaryType + 5, // 5: schema_proto.DataType.map_type:type_name -> schema_proto.MapType + 0, // 6: schema_proto.DataType.logic_type:type_name -> schema_proto.LogicType + 8, // 7: schema_proto.DataType.children:type_name -> schema_proto.Field + 6, // 8: schema_proto.Field.data_type:type_name -> schema_proto.DataType + 7, // 9: schema_proto.Field.metadata:type_name -> schema_proto.KeyValueMetadata + 8, // 10: schema_proto.ArrowSchema.fields:type_name -> schema_proto.Field + 1, // 11: schema_proto.ArrowSchema.endianness:type_name -> schema_proto.Endianness + 7, // 12: schema_proto.ArrowSchema.metadata:type_name -> schema_proto.KeyValueMetadata + 10, // 13: schema_proto.Schema.arrow_schema:type_name -> schema_proto.ArrowSchema + 9, // 14: schema_proto.Schema.schema_options:type_name -> schema_proto.SchemaOptions + 15, // [15:15] is the sub-list for method output_type + 15, // [15:15] is the sub-list for method input_type + 15, // [15:15] is the sub-list for extension type_name + 15, // [15:15] is the sub-list for extension extendee + 0, // [0:15] is the sub-list for field type_name +} + +func init() { file_schema_proto_init() } +func file_schema_proto_init() { + if File_schema_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_schema_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*FixedSizeBinaryType); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_schema_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*FixedSizeListType); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_schema_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DictionaryType); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_schema_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*MapType); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_schema_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*DataType); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_schema_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*KeyValueMetadata); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_schema_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Field); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_schema_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*SchemaOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_schema_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ArrowSchema); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_schema_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*Schema); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + file_schema_proto_msgTypes[4].OneofWrappers = []interface{}{ + (*DataType_FixedSizeBinaryType)(nil), + (*DataType_FixedSizeListType)(nil), + (*DataType_DictionaryType)(nil), + (*DataType_MapType)(nil), + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_schema_proto_rawDesc, + NumEnums: 2, + NumMessages: 10, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_schema_proto_goTypes, + DependencyIndexes: file_schema_proto_depIdxs, + EnumInfos: file_schema_proto_enumTypes, + MessageInfos: file_schema_proto_msgTypes, + }.Build() + File_schema_proto = out.File + file_schema_proto_rawDesc = nil + file_schema_proto_goTypes = nil + file_schema_proto_depIdxs = nil +} diff --git a/go/storage/default_space.go b/go/storage/default_space.go new file mode 100644 index 00000000..5f336a04 --- /dev/null +++ b/go/storage/default_space.go @@ -0,0 +1,264 @@ +package storage + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/milvus-io/milvus-storage-format/common/status" + "github.com/milvus-io/milvus-storage-format/common/utils" + "github.com/milvus-io/milvus-storage-format/file/fragment" + "github.com/milvus-io/milvus-storage-format/io/format" + "github.com/milvus-io/milvus-storage-format/io/format/parquet" + "github.com/milvus-io/milvus-storage-format/io/fs" + mnf "github.com/milvus-io/milvus-storage-format/storage/manifest" + "github.com/milvus-io/milvus-storage-format/storage/options" + "github.com/milvus-io/milvus-storage-format/storage/schema" +) + +type DefaultSpace struct { + basePath string + fs fs.Fs + schema *schema.Schema + + deleteFragments fragment.DeleteFragmentVector + manifest *mnf.Manifest + options *options.Options +} + +func NewSeparateVectorSpace(schema *schema.Schema, op *options.Options) *DefaultSpace { + fsFactory := fs.NewFsFactory() + f := fsFactory.Create(options.LocalFS) + // TODO: implement uri parser + uri := op.Uri + maniFest := mnf.NewManifest(schema, op) + // TODO: implement delete fragment + deleteFragments := fragment.DeleteFragmentVector{} + + return &DefaultSpace{ + basePath: uri, + fs: f, + schema: schema, + options: op, + manifest: maniFest, + deleteFragments: deleteFragments, + } +} + +func (s *DefaultSpace) Write(reader array.RecordReader, option *options.WriteOptions) error { + // check schema consistency + if !s.schema.Schema().Equal(reader.Schema()) { + return ErrSchemaNotMatch + } + + scalarSchema, vectorSchema := s.schema.ScalarSchema(), s.schema.VectorSchema() + + var ( + scalarWriter format.Writer + vectorWriter format.Writer + + scalarCols []arrow.Array + vectorCols []arrow.Array + + scalarFragment fragment.Fragment + vectorFragment fragment.Fragment + ) + + for reader.Next() { + rec := reader.Record() + + if rec.NumRows() == 0 { + continue + } + + for i := 0; i < int(rec.NumCols()); i++ { + if scalarSchema.HasField(rec.ColumnName(i)) { + scalarCols = append(scalarCols, rec.Column(i)) + } + if vectorSchema.HasField(rec.ColumnName(i)) { + vectorCols = append(vectorCols, rec.Column(i)) + } + } + + // add offset column + offsetValues := make([]int64, rec.NumRows()) + for i := 0; i < int(rec.NumRows()); i++ { + offsetValues[i] = int64(i) + } + builder := array.Int64Builder{} + builder.AppendValues(offsetValues, nil) + scalarCols = append(scalarCols, builder.NewArray()) + + scalarRecord := array.NewRecord(scalarSchema, scalarCols, rec.NumRows()) + vectorRecord := array.NewRecord(vectorSchema, vectorCols, rec.NumRows()) + + if scalarWriter == nil { + scalarFilePath := utils.GetNewParquetFilePath(s.manifest.SpaceOptions().Uri) + _, err := parquet.NewFileWriter(scalarSchema, s.fs, scalarFilePath) + if err != nil { + return err + } + + vectorFilePath := utils.GetNewParquetFilePath(s.manifest.SpaceOptions().Uri) + _, err = parquet.NewFileWriter(vectorSchema, s.fs, vectorFilePath) + if err != nil { + return err + } + + scalarFragment.AddFile(scalarFilePath) + vectorFragment.AddFile(vectorFilePath) + + } + + scalarWriter.Write(scalarRecord) + vectorWriter.Write(vectorRecord) + + if scalarWriter.Count() >= option.MaxRecordPerFile { + scalarWriter.Close() + vectorWriter.Close() + scalarWriter = nil + vectorWriter = nil + } + + } + + if scalarWriter != nil { + scalarWriter.Close() + vectorWriter.Close() + scalarWriter = nil + vectorWriter = nil + } + + copiedManifest := s.manifest.Copy() + oldVersion := s.manifest.Version() + scalarFragment.SetFragmentId(oldVersion + 1) + vectorFragment.SetFragmentId(oldVersion + 1) + copiedManifest.AddScalarFragment(scalarFragment) + copiedManifest.AddVectorFragment(vectorFragment) + copiedManifest.SetVersion(oldVersion + 1) + s.SafeSaveManifest(copiedManifest) + + s.manifest = new(mnf.Manifest) + return nil +} + +func (s *DefaultSpace) Write1(reader array.RecordReader, options *options.WriteOptions) error { + // check schema consistency + if !s.schema.Schema().Equal(reader.Schema()) { + return ErrSchemaNotMatch + } + + scalarSchema, vectorSchema := s.schema.ScalarSchema(), s.schema.VectorSchema() + var ( + scalarWriter format.Writer + vectorWriter format.Writer + scalarFragment *fragment.Fragment + vectorFragment *fragment.Fragment + ) + + for reader.Next() { + rec := reader.Record() + + if rec.NumRows() == 0 { + continue + } + + var ( + err error + ) + + scalarWriter, err = s.write(scalarSchema, rec, scalarWriter, scalarFragment, options, true) + if err != nil { + return err + } + + vectorWriter, err = s.write(vectorSchema, rec, vectorWriter, vectorFragment, options, false) + if err != nil { + return err + } + } + + if scalarWriter != nil { + if err := scalarWriter.Close(); err != nil { + return err + } + if err := vectorWriter.Close(); err != nil { + return err + } + } + + copiedManifest := s.manifest + oldVersion := s.manifest.Version() + scalarFragment.SetFragmentId(oldVersion + 1) + vectorFragment.SetFragmentId(oldVersion + 1) + copiedManifest.AddScalarFragment(*scalarFragment) + copiedManifest.AddVectorFragment(*vectorFragment) + copiedManifest.SetVersion(oldVersion + 1) + + s.SafeSaveManifest(copiedManifest) + s.manifest = new(mnf.Manifest) + + return nil +} + +func (s *DefaultSpace) SafeSaveManifest(manifest *mnf.Manifest) status.Status { + tmpManifestFilePath := utils.GetManifestTmpFilePath(manifest.SpaceOptions().Uri) + manifestFilePath := utils.GetManifestFilePath(manifest.SpaceOptions().Uri) + output, _ := s.fs.OpenFile(tmpManifestFilePath) + mnf.WriteManifestFile(manifest, output) + s.fs.Rename(tmpManifestFilePath, manifestFilePath) + s.fs.DeleteFile(tmpManifestFilePath) + return status.OK() +} + +func (s *DefaultSpace) write( + scalarSchema *arrow.Schema, + rec arrow.Record, + writer format.Writer, + scalarFragment *fragment.Fragment, + opt *options.WriteOptions, + isScalar bool, +) (format.Writer, error) { + + var scalarCols []arrow.Array + for i := 0; i < int(rec.NumCols()); i++ { + if scalarSchema.HasField(rec.ColumnName(i)) { + scalarCols = append(scalarCols, rec.Column(i)) + } + } + + if isScalar { + // add offset column for scalar + offsetValues := make([]int64, rec.NumRows()) + for i := 0; i < int(rec.NumRows()); i++ { + offsetValues[i] = int64(i) + } + builder := array.Int64Builder{} + builder.AppendValues(offsetValues, nil) + scalarCols = append(scalarCols, builder.NewArray()) + } + + var err error + + scalarRecord := array.NewRecord(scalarSchema, scalarCols, rec.NumRows()) + + if writer == nil { + scalarFilePath := utils.GetNewParquetFilePath(s.manifest.SpaceOptions().Uri) + writer, err = parquet.NewFileWriter(scalarSchema, s.fs, scalarFilePath) + if err != nil { + return nil, err + } + scalarFragment.AddFile(scalarFilePath) + } + + writer.Write(scalarRecord) + + if writer.Count() >= opt.MaxRecordPerFile { + writer.Close() + writer = nil + } + + return writer, nil +} + +func (s *DefaultSpace) Read(options *options.ReadOptions) (array.RecordReader, error) { + panic("not implemented") // TODO: Implement +} diff --git a/go/storage/manifest/manifest.go b/go/storage/manifest/manifest.go new file mode 100644 index 00000000..d1a50b2e --- /dev/null +++ b/go/storage/manifest/manifest.go @@ -0,0 +1,173 @@ +package manifest + +import ( + "github.com/milvus-io/milvus-storage-format/common/result" + "github.com/milvus-io/milvus-storage-format/common/status" + "github.com/milvus-io/milvus-storage-format/file/fragment" + "github.com/milvus-io/milvus-storage-format/io/fs" + "github.com/milvus-io/milvus-storage-format/io/fs/file" + "github.com/milvus-io/milvus-storage-format/proto/manifest_proto" + "github.com/milvus-io/milvus-storage-format/storage/options" + "github.com/milvus-io/milvus-storage-format/storage/schema" + "google.golang.org/protobuf/proto" +) + +type Manifest struct { + schema *schema.Schema + options *options.Options + ScalarFragments fragment.FragmentVector + vectorFragments fragment.FragmentVector + deleteFragments fragment.FragmentVector + + version int64 +} + +func NewManifest(schema *schema.Schema, options *options.Options) *Manifest { + + return &Manifest{ + schema: schema, + options: options, + } +} + +func (m *Manifest) Copy() *Manifest { + copied := *m + return &copied +} + +func (m *Manifest) GetSchema() *schema.Schema { + return m.schema +} + +func (m *Manifest) AddScalarFragment(fragment fragment.Fragment) { + m.ScalarFragments = append(m.ScalarFragments, fragment) +} + +func (m *Manifest) AddVectorFragment(fragment fragment.Fragment) { + m.vectorFragments = append(m.vectorFragments, fragment) +} + +func (m *Manifest) AddDeleteFragment(fragment fragment.Fragment) { + m.deleteFragments = append(m.deleteFragments, fragment) +} + +func (m *Manifest) GetScalarFragments() fragment.FragmentVector { + return m.ScalarFragments +} + +func (m *Manifest) GetVectorFragments() fragment.FragmentVector { + return m.vectorFragments +} + +func (m *Manifest) GetDeleteFragments() fragment.FragmentVector { + return m.deleteFragments +} + +func (m *Manifest) Version() int64 { + return m.version +} + +func (m *Manifest) SetVersion(version int64) { + m.version = version +} + +func (m *Manifest) SpaceOptions() *options.Options { + return m.options +} + +func (m *Manifest) ToProtobuf() *result.Result[*manifest_proto.Manifest] { + manifest := &manifest_proto.Manifest{} + manifest.Version = m.version + manifest.Options = m.options.ToProtobuf() + for _, vectorFragment := range m.vectorFragments { + manifest.VectorFragments = append(manifest.VectorFragments, vectorFragment.ToProtobuf()) + } + for _, scalarFragment := range m.ScalarFragments { + manifest.ScalarFragments = append(manifest.ScalarFragments, scalarFragment.ToProtobuf()) + } + for _, deleteFragment := range m.deleteFragments { + manifest.DeleteFragments = append(manifest.DeleteFragments, deleteFragment.ToProtobuf()) + } + + schemaProto := m.schema.ToProtobuf() + if !schemaProto.Ok() { + return result.NewResultFromStatus[*manifest_proto.Manifest](*schemaProto.Status()) + } + manifest.Schema = schemaProto.Value() + return result.NewResult[*manifest_proto.Manifest](manifest) +} + +func (m *Manifest) FromProtobuf(manifest *manifest_proto.Manifest) { + + m.options.FromProtobuf(manifest.Options) + + m.schema.FromProtobuf(manifest.Schema) + + for _, vectorFragment := range manifest.VectorFragments { + m.vectorFragments = append(m.vectorFragments, *fragment.FromProtobuf(vectorFragment)) + } + + for _, scalarFragment := range manifest.ScalarFragments { + m.ScalarFragments = append(m.ScalarFragments, *fragment.FromProtobuf(scalarFragment)) + } + + for _, deleteFragment := range manifest.DeleteFragments { + m.deleteFragments = append(m.deleteFragments, *fragment.FromProtobuf(deleteFragment)) + } + + m.version = manifest.Version +} + +func WriteManifestFile(manifest *Manifest, output file.File) status.Status { + protoManifestTmp := manifest.ToProtobuf() + + if !protoManifestTmp.Ok() { + return *protoManifestTmp.Status() + } + protoManifest := protoManifestTmp.Value() + + bytes, err := proto.Marshal(protoManifest) + if err != nil { + return status.InternalStateError("Failed to marshal manifest proto") + } + output.Write(bytes) + + return status.OK() + +} + +// TODO REMOVE BELOW CODE + +type DataFile struct { + path string + cols []string +} + +func (d *DataFile) Path() string { + return d.path +} + +func NewDataFile(path string) *DataFile { + return &DataFile{path: path} +} + +type ManifestV1 struct { + dataFiles []*DataFile +} + +func (m *ManifestV1) AddDataFiles(files ...*DataFile) { + m.dataFiles = append(m.dataFiles, files...) +} + +func (m *ManifestV1) DataFiles() []*DataFile { + return m.dataFiles +} + +func NewManifestV1() *ManifestV1 { + return &ManifestV1{} +} + +func WriteManifestFileV1(fs fs.Fs, manifest *ManifestV1) error { + // TODO + return nil +} diff --git a/go/storage/options/options.go b/go/storage/options/options.go new file mode 100644 index 00000000..0a58448a --- /dev/null +++ b/go/storage/options/options.go @@ -0,0 +1,102 @@ +package options + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/milvus-io/milvus-storage-format/common/status" + "github.com/milvus-io/milvus-storage-format/filter" + "github.com/milvus-io/milvus-storage-format/proto/manifest_proto" + "github.com/milvus-io/milvus-storage-format/proto/schema_proto" +) + +type Options struct { + Uri string +} + +func (o *Options) ToProtobuf() *manifest_proto.Options { + options := &manifest_proto.Options{} + options.Uri = o.Uri + return options +} + +func (o *Options) FromProtobuf(options *manifest_proto.Options) { + o.Uri = options.Uri +} + +type SchemaOptions struct { + PrimaryColumn string + VersionColumn string + VectorColumn string +} + +func (o *SchemaOptions) ToProtobuf() *schema_proto.SchemaOptions { + options := &schema_proto.SchemaOptions{} + options.PrimaryColumn = o.PrimaryColumn + options.VersionColumn = o.VersionColumn + options.VectorColumn = o.VectorColumn + return options +} + +func (o *SchemaOptions) FromProtobuf(options *schema_proto.SchemaOptions) { + o.PrimaryColumn = options.PrimaryColumn + o.VersionColumn = options.VersionColumn + o.VectorColumn = options.VectorColumn +} + +func (o *SchemaOptions) Validate(schema *arrow.Schema) status.Status { + if o.PrimaryColumn != "" { + primaryField, b := schema.FieldsByName(o.PrimaryColumn) + if !b { + return status.InvalidArgument("primary column not found") + } else if primaryField[0].Type.ID() != arrow.STRING && primaryField[0].Type.ID() != arrow.INT64 { + return status.InvalidArgument("primary column is not int64 or string") + } + } else { + return status.InvalidArgument("primary column is empty") + } + if o.VersionColumn != "" { + versionField, b := schema.FieldsByName(o.VersionColumn) + if !b { + return status.InvalidArgument("version column not found") + } else if versionField[0].Type.ID() != arrow.INT64 { + return status.InvalidArgument("version column is not int64") + } + } + if o.VectorColumn != "" { + vectorField, b := schema.FieldsByName(o.VectorColumn) + if !b { + return status.InvalidArgument("vector column not found") + } else if vectorField[0].Type.ID() != arrow.FIXED_SIZE_BINARY { + return status.InvalidArgument("vector column is not fixed size binary") + } + } else { + return status.InvalidArgument("vector column is empty") + } + return status.OK() +} + +type WriteOptions struct { + MaxRecordPerFile int64 +} + +func NewWriteOption() *WriteOptions { + return &WriteOptions{ + MaxRecordPerFile: 1024, + } +} + +type FsType int8 + +const ( + InMemory FsType = iota + LocalFS +) + +type SpaceOptions struct { + Fs FsType + VectorColumns []string +} + +type ReadOptions struct { + Filters map[string]filter.Filter + Columns []string +} diff --git a/go/storage/record_reader.go b/go/storage/record_reader.go index 39e9238b..6732c75b 100644 --- a/go/storage/record_reader.go +++ b/go/storage/record_reader.go @@ -5,14 +5,14 @@ import ( "sync/atomic" "github.com/apache/arrow/go/v12/arrow" - "github.com/milvus-io/milvus-storage-format/internal/format" - "github.com/milvus-io/milvus-storage-format/internal/format/parquet" - "github.com/milvus-io/milvus-storage-format/options" + "github.com/milvus-io/milvus-storage-format/io/format" + "github.com/milvus-io/milvus-storage-format/io/format/parquet" + "github.com/milvus-io/milvus-storage-format/storage/options" ) type DefaultRecordReader struct { ref int64 - space *DefaultSpace + space *ReferenceSpace options *options.ReadOptions curReader format.Reader nextPos int @@ -20,7 +20,7 @@ type DefaultRecordReader struct { err error } -func NewDefaultRecordReader(space *DefaultSpace, options *options.ReadOptions) *DefaultRecordReader { +func NewDefaultRecordReader(space *ReferenceSpace, options *options.ReadOptions) *DefaultRecordReader { return &DefaultRecordReader{ space: space, options: options, diff --git a/go/storage/reference_space.go b/go/storage/reference_space.go new file mode 100644 index 00000000..226e20ec --- /dev/null +++ b/go/storage/reference_space.go @@ -0,0 +1,98 @@ +package storage + +import ( + "fmt" + "github.com/apache/arrow/go/v12/arrow" + "github.com/apache/arrow/go/v12/arrow/array" + "github.com/google/uuid" + "github.com/milvus-io/milvus-storage-format/io/format" + "github.com/milvus-io/milvus-storage-format/io/format/parquet" + "github.com/milvus-io/milvus-storage-format/io/fs" + "github.com/milvus-io/milvus-storage-format/storage/manifest" + "github.com/milvus-io/milvus-storage-format/storage/options" +) + +type ReferenceSpace struct { + schema *arrow.Schema + fs fs.Fs + options *options.SpaceOptions + manifest *manifest.ManifestV1 +} + +func (s *ReferenceSpace) Write(reader array.RecordReader, options *options.WriteOptions) error { + // check schema consistency + if !s.schema.Equal(reader.Schema()) { + return ErrSchemaNotMatch + } + + var dataFiles []*manifest.DataFile + var writer format.Writer + var err error + // write data + for reader.Next() { + rec := reader.Record() + + if rec.NumRows() == 0 { + continue + } + + if writer == nil { + filePath := uuid.NewString() + ".parquet" + writer, err = parquet.NewFileWriter(s.schema, s.fs, filePath) + if err != nil { + return err + } + dataFiles = append(dataFiles, manifest.NewDataFile(filePath)) + } + + if err := writer.Write(rec); err != nil { + return err + } + + if writer.Count() >= options.MaxRecordPerFile { + if err := writer.Close(); err != nil { + return err + } + writer = nil + } + } + + if writer != nil { + if err := writer.Close(); err != nil { + return err + } + } + + // update manifest + if len(dataFiles) != 0 { + s.manifest.AddDataFiles(dataFiles...) + if err := manifest.WriteManifestFileV1(s.fs, s.manifest); err != nil { + return err + } + } + + return nil +} + +// Read return a RecordReader. Remember to call Release after using the RecordReader +func (s *ReferenceSpace) Read(options *options.ReadOptions) (array.RecordReader, error) { + // check read options + for _, col := range options.Columns { + if !s.schema.HasField(col) { + return nil, fmt.Errorf("%w: %s", ErrColumnNotExist, col) + } + } + + return NewDefaultRecordReader(s, options), nil +} + +func NewReferenceSpace(schema *arrow.Schema, options *options.SpaceOptions) *ReferenceSpace { + fsFactory := fs.NewFsFactory() + fs := fsFactory.Create(options.Fs) + return &ReferenceSpace{ + schema: schema, + fs: fs, + options: options, + manifest: manifest.NewManifestV1(), + } +} diff --git a/go/storage/schema/schema.go b/go/storage/schema/schema.go new file mode 100644 index 00000000..aac92002 --- /dev/null +++ b/go/storage/schema/schema.go @@ -0,0 +1,113 @@ +package schema + +import ( + "github.com/apache/arrow/go/v12/arrow" + "github.com/milvus-io/milvus-storage-format/common/result" + "github.com/milvus-io/milvus-storage-format/common/status" + "github.com/milvus-io/milvus-storage-format/common/utils" + "github.com/milvus-io/milvus-storage-format/proto/schema_proto" + "github.com/milvus-io/milvus-storage-format/storage/options" +) + +type Schema struct { + schema *arrow.Schema + scalarSchema *arrow.Schema + vectorSchema *arrow.Schema + deleteSchema *arrow.Schema + + options *options.SchemaOptions +} + +func (s *Schema) Schema() *arrow.Schema { + return s.schema +} + +func (s *Schema) Options() *options.SchemaOptions { + return s.options +} + +func NewSchema(schema *arrow.Schema, options *options.SchemaOptions) *Schema { + return &Schema{ + schema: schema, + options: options, + } +} + +func (s *Schema) ScalarSchema() *arrow.Schema { + return s.scalarSchema +} + +func (s *Schema) VectorSchema() *arrow.Schema { + return s.vectorSchema +} + +func (s *Schema) DeleteSchema() *arrow.Schema { + return s.deleteSchema +} + +func (s *Schema) FromProtobuf(schema *schema_proto.Schema) status.Status { + schemaType := utils.FromProtobufSchema(schema.ArrowSchema) + if !schemaType.Ok() { + return status.ArrowError("invalid schema") + } + s.schema = schemaType.Value() + s.options.FromProtobuf(schema.GetSchemaOptions()) + s.BuildScalarSchema() + s.BuildVectorSchema() + s.BuildDeleteSchema() + return status.OK() +} + +func (s *Schema) ToProtobuf() *result.Result[*schema_proto.Schema] { + schema := &schema_proto.Schema{} + arrowSchema := utils.ToProtobufSchema(s.schema) + if !arrowSchema.Ok() { + return result.NewResultFromStatus[*schema_proto.Schema](*arrowSchema.Status()) + } + schema.ArrowSchema = arrowSchema.Value() + schema.SchemaOptions = s.options.ToProtobuf() + return result.NewResult[*schema_proto.Schema](schema) +} + +func (s *Schema) BuildScalarSchema() status.Status { + fields := make([]arrow.Field, 0, len(s.schema.Fields())) + for _, field := range s.schema.Fields() { + if field.Name == s.options.VectorColumn { + continue + } + fields = append(fields, field) + } + s.scalarSchema = arrow.NewSchema(fields, nil) + + return status.OK() +} + +func (s *Schema) BuildVectorSchema() status.Status { + fields := make([]arrow.Field, 0, len(s.schema.Fields())) + for _, field := range s.schema.Fields() { + if field.Name == s.options.VectorColumn || + field.Name == s.options.PrimaryColumn || + field.Name == s.options.VersionColumn { + fields = append(fields, field) + } + } + s.vectorSchema = arrow.NewSchema(fields, nil) + + return status.OK() +} + +func (s *Schema) BuildDeleteSchema() status.Status { + pkColumn, b := s.schema.FieldsByName(s.options.PrimaryColumn) + if !b { + return status.InvalidArgument("primary column not found") + } + versionField, b := s.schema.FieldsByName(s.options.VersionColumn) + if !b { + return status.InvalidArgument("version column not found") + } + fields := make([]arrow.Field, 0, 2) + fields = append(fields, pkColumn[0]) + fields = append(fields, versionField[0]) + s.deleteSchema = arrow.NewSchema(fields, nil) + return status.OK() +} diff --git a/go/storage/separate_vector_space.go b/go/storage/separate_vector_space.go deleted file mode 100644 index ce59f013..00000000 --- a/go/storage/separate_vector_space.go +++ /dev/null @@ -1,120 +0,0 @@ -package storage - -import ( - "github.com/apache/arrow/go/v12/arrow" - "github.com/apache/arrow/go/v12/arrow/array" - "github.com/google/uuid" - "github.com/milvus-io/milvus-storage-format/internal/format" - "github.com/milvus-io/milvus-storage-format/internal/format/parquet" - "github.com/milvus-io/milvus-storage-format/internal/fs" - "github.com/milvus-io/milvus-storage-format/internal/manifest" - "github.com/milvus-io/milvus-storage-format/options" -) - -type SeparateVectorSpace struct { - manifest *manifest.ManifestV2 - fs fs.Fs - options *options.SpaceOptions -} - -func (s *SeparateVectorSpace) Write(reader array.RecordReader, options *options.WriteOptions) error { - // check schema consistency - if !s.manifest.Schema().Equal(reader.Schema()) { - return ErrSchemaNotMatch - } - - scalarSchema, vectorSchema := s.manifest.ScalarSchema(), s.manifest.VectorSchema() - var ( - scalarWriter format.Writer - vectorWriter format.Writer - scalarFiles []*manifest.DataFile - vectorFiles []*manifest.DataFile - ) - - for reader.Next() { - rec := reader.Record() - - if rec.NumRows() == 0 { - continue - } - - var ( - err error - scalarDataFile *manifest.DataFile - vectorDataFile *manifest.DataFile - ) - - scalarWriter, scalarDataFile, err = s.write(scalarSchema, rec, scalarWriter, options) - if err != nil { - return err - } - if scalarDataFile != nil { - scalarFiles = append(scalarFiles, scalarDataFile) - } - - vectorWriter, vectorDataFile, err = s.write(vectorSchema, rec, vectorWriter, options) - if err != nil { - return err - } - if vectorDataFile != nil { - vectorFiles = append(vectorFiles, vectorDataFile) - } - } - - if scalarWriter != nil { - if err := scalarWriter.Close(); err != nil { - return err - } - if err := vectorWriter.Close(); err != nil { - return err - } - } - - if len(scalarFiles) != 0 { - s.manifest.AddScalarDataFiles(scalarFiles...) - s.manifest.AddVectorDataFiles(vectorFiles...) - if err := manifest.WriteManifestV2File(s.fs, s.manifest); err != nil { - return err - } - } - - return nil -} - -func (s *SeparateVectorSpace) write(schema *arrow.Schema, rec arrow.Record, writer format.Writer, opt *options.WriteOptions) (format.Writer, *manifest.DataFile, error) { - var arrs []arrow.Array - for i := 0; i < int(rec.NumCols()); i++ { - if schema.HasField(rec.ColumnName(i)) { - arrs = append(arrs, rec.Column(i)) - } - } - - var err error - var dataFile *manifest.DataFile - if writer == nil { - filePath := uuid.NewString() + ".parquet" - writer, err = parquet.NewFileWriter(schema, s.fs, filePath) - if err != nil { - return nil, nil, err - } - dataFile = manifest.NewDataFile(filePath) - } - - rec = array.NewRecord(schema, arrs, int64(rec.NumRows())) - if err := writer.Write(rec); err != nil { - return nil, nil, err - } - - if writer.Count() >= opt.MaxRowsPerFile { - if err := writer.Close(); err != nil { - return nil, nil, err - } - writer = nil - } - - return writer, dataFile, nil -} - -func (s *SeparateVectorSpace) Read(options *options.ReadOptions) (array.RecordReader, error) { - panic("not implemented") // TODO: Implement -} diff --git a/go/storage/space.go b/go/storage/space.go index 4eff0229..beecc9b8 100644 --- a/go/storage/space.go +++ b/go/storage/space.go @@ -2,16 +2,8 @@ package storage import ( "errors" - "fmt" - - "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow/array" - "github.com/google/uuid" - "github.com/milvus-io/milvus-storage-format/internal/format" - "github.com/milvus-io/milvus-storage-format/internal/format/parquet" - "github.com/milvus-io/milvus-storage-format/internal/fs" - "github.com/milvus-io/milvus-storage-format/internal/manifest" - "github.com/milvus-io/milvus-storage-format/options" + "github.com/milvus-io/milvus-storage-format/storage/options" ) var ( @@ -23,88 +15,3 @@ type Space interface { Write(reader array.RecordReader, options *options.WriteOptions) error Read(options *options.ReadOptions) (array.RecordReader, error) } - -type DefaultSpace struct { - schema *arrow.Schema - fs fs.Fs - options *options.SpaceOptions - manifest *manifest.ManifestV1 -} - -func (s *DefaultSpace) Write(reader array.RecordReader, options *options.WriteOptions) error { - // check schema consistency - if !s.schema.Equal(reader.Schema()) { - return ErrSchemaNotMatch - } - - var dataFiles []*manifest.DataFile - var writer format.Writer - var err error - // write data - for reader.Next() { - rec := reader.Record() - - if rec.NumRows() == 0 { - continue - } - - if writer == nil { - filePath := uuid.NewString() + ".parquet" - writer, err = parquet.NewFileWriter(s.schema, s.fs, filePath) - if err != nil { - return err - } - dataFiles = append(dataFiles, manifest.NewDataFile(filePath)) - } - - if err := writer.Write(rec); err != nil { - return err - } - - if writer.Count() >= options.MaxRowsPerFile { - if err := writer.Close(); err != nil { - return err - } - writer = nil - } - } - - if writer != nil { - if err := writer.Close(); err != nil { - return err - } - } - - // update manifest - if len(dataFiles) != 0 { - s.manifest.AddDataFiles(dataFiles...) - if err := manifest.WriteManifestFile(s.fs, s.manifest); err != nil { - return err - } - } - - return nil -} - -// Read return a RecordReader. Remember to call Release after using the RecordReader -func (s *DefaultSpace) Read(options *options.ReadOptions) (array.RecordReader, error) { - // check read options - for _, col := range options.Columns { - if !s.schema.HasField(col) { - return nil, fmt.Errorf("%w: %s", ErrColumnNotExist, col) - } - } - - return NewDefaultRecordReader(s, options), nil -} - -func NewDefaultSpace(schema *arrow.Schema, options *options.SpaceOptions) *DefaultSpace { - fsFactory := fs.NewFsFactory() - fs := fsFactory.Create(options.Fs) - return &DefaultSpace{ - schema: schema, - fs: fs, - options: options, - manifest: manifest.NewManifest(), - } -} diff --git a/go/storage/space_test.go b/go/storage/space_test.go index 8b2e577c..0135950f 100644 --- a/go/storage/space_test.go +++ b/go/storage/space_test.go @@ -1,13 +1,13 @@ package storage_test import ( + "github.com/milvus-io/milvus-storage-format/storage/options" "testing" "github.com/apache/arrow/go/v12/arrow" "github.com/apache/arrow/go/v12/arrow/array" "github.com/apache/arrow/go/v12/arrow/memory" "github.com/milvus-io/milvus-storage-format/filter" - "github.com/milvus-io/milvus-storage-format/options" "github.com/milvus-io/milvus-storage-format/storage" "github.com/stretchr/testify/suite" ) @@ -31,8 +31,8 @@ func (suite *DefaultSpaceTestSuite) TestSpaceReadWrite() { recReader, err := array.NewRecordReader(schema, []arrow.Record{rec}) suite.NoError(err) - space := storage.NewDefaultSpace(schema, &options.SpaceOptions{Fs: options.InMemory}) - writeOpt := &options.WriteOptions{MaxRowsPerFile: 10} + space := storage.NewReferenceSpace(schema, &options.SpaceOptions{Fs: options.InMemory}) + writeOpt := &options.WriteOptions{MaxRecordPerFile: 10} space.Write(recReader, writeOpt) f := filter.NewConstantFilter(filter.GreaterThan, int32(3)) From 6c70338bb013dbd67d167b6bd86a06b6615f1c6d Mon Sep 17 00:00:00 2001 From: Xwg Date: Thu, 22 Jun 2023 14:25:40 +0800 Subject: [PATCH 5/5] feat(storage): Remove redundant code --- go/storage/default_space.go | 99 +------------------------------------ 1 file changed, 1 insertion(+), 98 deletions(-) diff --git a/go/storage/default_space.go b/go/storage/default_space.go index 5f336a04..f89d5071 100644 --- a/go/storage/default_space.go +++ b/go/storage/default_space.go @@ -43,104 +43,7 @@ func NewSeparateVectorSpace(schema *schema.Schema, op *options.Options) *Default } } -func (s *DefaultSpace) Write(reader array.RecordReader, option *options.WriteOptions) error { - // check schema consistency - if !s.schema.Schema().Equal(reader.Schema()) { - return ErrSchemaNotMatch - } - - scalarSchema, vectorSchema := s.schema.ScalarSchema(), s.schema.VectorSchema() - - var ( - scalarWriter format.Writer - vectorWriter format.Writer - - scalarCols []arrow.Array - vectorCols []arrow.Array - - scalarFragment fragment.Fragment - vectorFragment fragment.Fragment - ) - - for reader.Next() { - rec := reader.Record() - - if rec.NumRows() == 0 { - continue - } - - for i := 0; i < int(rec.NumCols()); i++ { - if scalarSchema.HasField(rec.ColumnName(i)) { - scalarCols = append(scalarCols, rec.Column(i)) - } - if vectorSchema.HasField(rec.ColumnName(i)) { - vectorCols = append(vectorCols, rec.Column(i)) - } - } - - // add offset column - offsetValues := make([]int64, rec.NumRows()) - for i := 0; i < int(rec.NumRows()); i++ { - offsetValues[i] = int64(i) - } - builder := array.Int64Builder{} - builder.AppendValues(offsetValues, nil) - scalarCols = append(scalarCols, builder.NewArray()) - - scalarRecord := array.NewRecord(scalarSchema, scalarCols, rec.NumRows()) - vectorRecord := array.NewRecord(vectorSchema, vectorCols, rec.NumRows()) - - if scalarWriter == nil { - scalarFilePath := utils.GetNewParquetFilePath(s.manifest.SpaceOptions().Uri) - _, err := parquet.NewFileWriter(scalarSchema, s.fs, scalarFilePath) - if err != nil { - return err - } - - vectorFilePath := utils.GetNewParquetFilePath(s.manifest.SpaceOptions().Uri) - _, err = parquet.NewFileWriter(vectorSchema, s.fs, vectorFilePath) - if err != nil { - return err - } - - scalarFragment.AddFile(scalarFilePath) - vectorFragment.AddFile(vectorFilePath) - - } - - scalarWriter.Write(scalarRecord) - vectorWriter.Write(vectorRecord) - - if scalarWriter.Count() >= option.MaxRecordPerFile { - scalarWriter.Close() - vectorWriter.Close() - scalarWriter = nil - vectorWriter = nil - } - - } - - if scalarWriter != nil { - scalarWriter.Close() - vectorWriter.Close() - scalarWriter = nil - vectorWriter = nil - } - - copiedManifest := s.manifest.Copy() - oldVersion := s.manifest.Version() - scalarFragment.SetFragmentId(oldVersion + 1) - vectorFragment.SetFragmentId(oldVersion + 1) - copiedManifest.AddScalarFragment(scalarFragment) - copiedManifest.AddVectorFragment(vectorFragment) - copiedManifest.SetVersion(oldVersion + 1) - s.SafeSaveManifest(copiedManifest) - - s.manifest = new(mnf.Manifest) - return nil -} - -func (s *DefaultSpace) Write1(reader array.RecordReader, options *options.WriteOptions) error { +func (s *DefaultSpace) Write(reader array.RecordReader, options *options.WriteOptions) error { // check schema consistency if !s.schema.Schema().Equal(reader.Schema()) { return ErrSchemaNotMatch