From 733e539d20ae9fa1b67d6d808e0442317e87a292 Mon Sep 17 00:00:00 2001 From: Semisol Date: Fri, 29 Aug 2025 00:25:05 +0300 Subject: [PATCH 1/2] Improve Go bindings tuple support This commit: - Requires a minimum Go version 1.20 to allow using newer features in future commits - Adds support for the length-prefixed bytes type - Fixes certain panics detected by fuzzing on Unpack - Switches to binary.BigEndian methods for decoding integers which eliminates allocations and is faster - Switches to google/uuid for UUIDs to reduce conversion boilerplate as it seems to be the de-facto library for UUIDs - Does some other refactors for a future tuple "builder" to reduce allocations and eliminate type-casting overhead --- bindings/go/go.mod | 5 +- bindings/go/go.sum | 2 + bindings/go/src/fdb/tuple/tuple.go | 182 ++++++++++++++++++++++------- 3 files changed, 144 insertions(+), 45 deletions(-) create mode 100644 bindings/go/go.sum diff --git a/bindings/go/go.mod b/bindings/go/go.mod index 16e502baaf1..3a689dcd4da 100644 --- a/bindings/go/go.mod +++ b/bindings/go/go.mod @@ -1,4 +1,5 @@ module github.com/apple/foundationdb/bindings/go -// The FoundationDB go bindings currently have no external golang dependencies outside of -// the go standard library. +go 1.20 + +require github.com/google/uuid v1.6.0 // indirect diff --git a/bindings/go/go.sum b/bindings/go/go.sum new file mode 100644 index 00000000000..7790d7c3e03 --- /dev/null +++ b/bindings/go/go.sum @@ -0,0 +1,2 @@ +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= diff --git a/bindings/go/src/fdb/tuple/tuple.go b/bindings/go/src/fdb/tuple/tuple.go index dd6f7a61f4e..cc38654b67b 100644 --- a/bindings/go/src/fdb/tuple/tuple.go +++ b/bindings/go/src/fdb/tuple/tuple.go @@ -47,6 +47,7 @@ import ( "strings" "github.com/apple/foundationdb/bindings/go/src/fdb" + "github.com/google/uuid" ) // A TupleElement is one of the types that may be encoded in FoundationDB @@ -88,7 +89,7 @@ func printTuple(tuple Tuple, sb *strings.Builder) { sb.WriteString("") case string: sb.WriteString(strconv.Quote(t)) - case UUID: + case uuid.UUID: sb.WriteString("UUID(") sb.WriteString(t.String()) sb.WriteString(")") @@ -96,6 +97,10 @@ func printTuple(tuple Tuple, sb *strings.Builder) { sb.WriteString("b\"") sb.WriteString(fdb.Printable(t)) sb.WriteString("\"") + case FixedLen: + sb.WriteString("fb\"") + sb.WriteString(fdb.Printable(t)) + sb.WriteString("\"") default: // For user-defined and standard types, we use standard Go // printer, which itself uses Stringer interface. @@ -110,16 +115,16 @@ func printTuple(tuple Tuple, sb *strings.Builder) { sb.WriteString(")") } -// UUID wraps a basic byte array as a UUID. We do not provide any special -// methods for accessing or generating the UUID, but as Go does not provide -// a built-in UUID type, this simple wrapper allows for other libraries -// to write the output of their UUID type as a 16-byte array into -// an instance of this type. -type UUID [16]byte - -func (uuid UUID) String() string { - return fmt.Sprintf("%x-%x-%x-%x-%x", uuid[0:4], uuid[4:6], uuid[6:8], uuid[8:10], uuid[10:]) -} +// FixedLen is a special type of byte slice that is encoded length-prefixed +// and without substitution. +// +// This is intended to store fixed-length identifiers like hashes and +// public keys, or for cases where variable-length strings are used and +// range-reads are not required. +// +// FixedLen only guarantees correct range read semantics for entries with the +// same length. +type FixedLen []byte // Versionstamp is struct for a FoundationDB verionstamp. Versionstamps are // 12 bytes long composed of a 10 byte transaction version and a 2 byte user @@ -174,6 +179,7 @@ const falseCode = 0x26 const trueCode = 0x27 const uuidCode = 0x30 const versionstampCode = 0x33 +const fixedLengthCode = 0x34 var sizeLimits = []uint64{ 1<<(0*8) - 1, @@ -340,11 +346,26 @@ func (p *packer) encodeDouble(d float64) { p.putBytes(scratch[:]) } -func (p *packer) encodeUUID(u UUID) { +func (p *packer) encodeUUID(u uuid.UUID) { p.putByte(uuidCode) p.putBytes(u[:]) } +func (p *packer) encodeFixedLen(f FixedLen) { + if len(f) < 256 { + p.putByte(fixedLengthCode) + p.putByte(byte(len(f))) + p.putBytes(f) + } else if len(f) < 65536 { + p.putByte(fixedLengthCode + 1) + p.putByte(byte(len(f) >> 8)) + p.putByte(byte(len(f) & 0xff)) + p.putBytes(f) + } else { + panic("FixedLen: too long") + } +} + func (p *packer) encodeVersionstamp(v Versionstamp) { p.putByte(versionstampCode) @@ -360,9 +381,17 @@ func (p *packer) encodeVersionstamp(v Versionstamp) { p.putBytes(v.Bytes()) } +func (p *packer) startTuple() { + p.putByte(nestedCode) +} + +func (p *packer) endTuple() { + p.putByte(0x00) +} + func (p *packer) encodeTuple(t Tuple, nested bool, versionstamps bool) { if nested { - p.putByte(nestedCode) + p.startTuple() } for i, e := range t { @@ -402,8 +431,10 @@ func (p *packer) encodeTuple(t Tuple, nested bool, versionstamps bool) { } else { p.putByte(falseCode) } - case UUID: + case uuid.UUID: p.encodeUUID(e) + case FixedLen: + p.encodeFixedLen(e) case Versionstamp: if versionstamps == false && e.TransactionVersion == incompleteTransactionVersion { panic(fmt.Sprintf("Incomplete Versionstamp included in vanilla tuple pack")) @@ -416,7 +447,7 @@ func (p *packer) encodeTuple(t Tuple, nested bool, versionstamps bool) { } if nested { - p.putByte(0x00) + p.endTuple() } } @@ -439,6 +470,14 @@ func (t Tuple) Pack() []byte { return p.buf } +// This acts the same as Pack() but adds the prefix. +func (t Tuple) PackWithPrefix(prefix []byte) []byte { + p := newPacker() + p.putBytes(prefix) + p.encodeTuple(t, false, false) + return p.buf +} + // PackWithVersionstamp packs the specified tuple into a key for versionstamp // operations. See Pack for more information. This function will return an error // if you attempt to pack a tuple with more than one versionstamp. This function will @@ -536,19 +575,22 @@ func findTerminator(b []byte) int { return length } -func decodeBytes(b []byte) ([]byte, int) { +func decodeBytes(b []byte) ([]byte, int, error) { idx := findTerminator(b[1:]) - return bytes.Replace(b[1:idx+1], []byte{0x00, 0xFF}, []byte{0x00}, -1), idx + 2 + if idx == -1 { + return nil, 0, errors.New("string does not have an end") + } + return bytes.Replace(b[1:idx+1], []byte{0x00, 0xFF}, []byte{0x00}, -1), idx + 2, nil } -func decodeString(b []byte) (string, int) { - bp, idx := decodeBytes(b) - return string(bp), idx +func decodeString(b []byte) (string, int, error) { + bp, idx, err := decodeBytes(b) + return string(bp), idx, err } -func decodeInt(b []byte) (interface{}, int) { +func decodeInt(b []byte) (interface{}, int, error) { if b[0] == intZeroCode { - return int64(0), 1 + return int64(0), 1, nil } var neg bool @@ -559,18 +601,21 @@ func decodeInt(b []byte) (interface{}, int) { neg = true } + if len(b) < (n + 1) { + return nil, 0, fmt.Errorf("insufficient bytes to decode int, need %d, have %d", n+1, len(b)) + } + bp := make([]byte, 8) copy(bp[8-n:], b[1:n+1]) - var ret int64 - binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) + var ret int64 = int64(binary.BigEndian.Uint64(bp)) if neg { - return ret - int64(sizeLimits[n]), n + 1 + return ret - int64(sizeLimits[n]), n + 1, nil } if ret > 0 { - return ret, n + 1 + return ret, n + 1, nil } // The encoded value claimed to be positive yet when put in an int64 @@ -578,15 +623,18 @@ func decodeInt(b []byte) (interface{}, int) { // 64-bit value that uses the most significant bit. This can be fit in a // uint64, so return that. Note that this is the *only* time we return // a uint64. - return uint64(ret), n + 1 + return uint64(ret), n + 1, nil } -func decodeBigInt(b []byte) (interface{}, int) { +func decodeBigInt(b []byte) (interface{}, int, error) { val := new(big.Int) offset := 1 var length int if b[0] == negIntStart || b[0] == posIntEnd { + if len(b) <= 1 { + return nil, 0, fmt.Errorf("insufficient bytes to decode bigint, need 2 but have %d", len(b)) + } length = int(b[1]) if b[0] == negIntStart { length ^= 0xff @@ -598,6 +646,10 @@ func decodeBigInt(b []byte) (interface{}, int) { length = 8 } + if len(b) <= (length + offset) { + return nil, 0, fmt.Errorf("insufficient bytes to decode bigint, need %d but have %d", length+offset, len(b)) + } + val.SetBytes(b[offset : length+offset]) if b[0] < intZeroCode { @@ -608,18 +660,17 @@ func decodeBigInt(b []byte) (interface{}, int) { // This is the only value that fits in an int64 or uint64 that is decoded with this function if val.Cmp(minInt64BigInt) == 0 { - return val.Int64(), length + offset + return val.Int64(), length + offset, nil } - return val, length + offset + return val, length + offset, nil } func decodeFloat(b []byte) (float32, int) { bp := make([]byte, 4) copy(bp, b[1:]) adjustFloatBytes(bp, false) - var ret float32 - binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) + var ret float32 = math.Float32frombits(binary.BigEndian.Uint32(bp)) return ret, 5 } @@ -627,17 +678,38 @@ func decodeDouble(b []byte) (float64, int) { bp := make([]byte, 8) copy(bp, b[1:]) adjustFloatBytes(bp, false) - var ret float64 - binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) + var ret float64 = math.Float64frombits(binary.BigEndian.Uint64(bp)) return ret, 9 } -func decodeUUID(b []byte) (UUID, int) { - var u UUID +func decodeUUID(b []byte) (uuid.UUID, int) { + var u uuid.UUID copy(u[:], b[1:]) return u, 17 } +func decodeFixedLen(b []byte) (FixedLen, int, error) { + off := 0 + length := 0 + if b[0] == fixedLengthCode { + if len(b) < 2 { + return nil, 0, errors.New("FixedLen: too short to decode") + } + length = int(b[1]) + off = 2 + } else if b[0] == fixedLengthCode+1 { + if len(b) < 3 { + return nil, 0, errors.New("FixedLen: too short to decode") + } + length = int(binary.BigEndian.Uint16(b[1:3])) + off = 3 + } + if len(b) < off+length { + return nil, 0, errors.New("FixedLen: too short to decode (content bytes)") + } + return b[off : off+length], off + length, nil +} + func decodeVersionstamp(b []byte) (Versionstamp, int) { var transactionVersion [10]byte var userVersion uint16 @@ -673,15 +745,33 @@ func decodeTuple(b []byte, nested bool) (Tuple, int, error) { return t, i + 1, nil } case b[i] == bytesCode: - el, off = decodeBytes(b[i:]) + var err error + el, off, err = decodeBytes(b[i:]) + if err != nil { + return nil, i, err + } case b[i] == stringCode: - el, off = decodeString(b[i:]) - case negIntStart+1 < b[i] && b[i] < posIntEnd: - el, off = decodeInt(b[i:]) - case negIntStart+1 == b[i] && (b[i+1]&0x80 != 0): - el, off = decodeInt(b[i:]) + var err error + el, off, err = decodeString(b[i:]) + if err != nil { + return nil, i, err + } + case negIntStart+1 == b[i] && len(b) <= (i+1): + return nil, i, fmt.Errorf("insufficient bytes to negative int at position %d", i) + case (negIntStart+1 == b[i] && (b[i+1]&0x80 == 0)) || + (negIntStart+1 < b[i] && b[i] < posIntEnd): + var err error + el, off, err = decodeInt(b[i:]) + if err != nil { + return nil, i, err + } + case negIntStart <= b[i] && b[i] <= posIntEnd: - el, off = decodeBigInt(b[i:]) + var err error + el, off, err = decodeBigInt(b[i:]) + if err != nil { + return nil, i, err + } case b[i] == floatCode: if i+5 > len(b) { return nil, i, fmt.Errorf("insufficient bytes to decode float starting at position %d of byte array for tuple", i) @@ -708,6 +798,12 @@ func decodeTuple(b []byte, nested bool) (Tuple, int, error) { return nil, i, fmt.Errorf("insufficient bytes to decode Versionstamp starting at position %d of byte array for tuple", i) } el, off = decodeVersionstamp(b[i:]) + case b[i] == fixedLengthCode || b[i] == fixedLengthCode+1: + var err error + el, off, err = decodeFixedLen(b[i:]) + if err != nil { + return nil, i, err + } case b[i] == nestedCode: var err error el, off, err = decodeTuple(b[i+1:], true) From a1dd772ebc3d586eb1e9a9d433c3b391380dd0d7 Mon Sep 17 00:00:00 2001 From: Semisol Date: Fri, 29 Aug 2025 00:30:11 +0300 Subject: [PATCH 2/2] Add new experimental tuple unpacker to Go bindings This commit adds an experimental tuple unpacker to the Go bindings which reduces unnecessary allocations and tries to optimize decoding. Tests for the FixedLen type are also added, alongside extending tests to ensure the unpacked value is identical to the starting value and adding benchmarks, and negative numbers. - Zero-copy is used for strings and byte slices whenever possible to eliminate allocations. - Suprisingly, a loop was faster than using standard library functions to locate the end of a string and check. - A function table was used as it could allow extensibility in the future and may be faster on certain platforms. - A custom Boxed type is used, which is larger than an interface{} but does not require contents to be allocated on the heap. This is effectively a tagged union. - A neat hack is used in the unpacking loop to only do 1 allocation in the majority of tuple decoding cases. --- bindings/go/src/fdb/tuple/boxed.go | 378 ++++++++++++++++++ .../go/src/fdb/tuple/testdata/tuples.golden | Bin 1722 -> 2290 bytes bindings/go/src/fdb/tuple/tuple_test.go | 97 ++++- bindings/go/src/fdb/tuple/tuple_v2.go | 334 ++++++++++++++++ 4 files changed, 806 insertions(+), 3 deletions(-) create mode 100644 bindings/go/src/fdb/tuple/boxed.go create mode 100644 bindings/go/src/fdb/tuple/tuple_v2.go diff --git a/bindings/go/src/fdb/tuple/boxed.go b/bindings/go/src/fdb/tuple/boxed.go new file mode 100644 index 00000000000..edf3a983735 --- /dev/null +++ b/bindings/go/src/fdb/tuple/boxed.go @@ -0,0 +1,378 @@ +package tuple + +import ( + "encoding/binary" + "math" + "unsafe" + + "github.com/google/uuid" +) + +// A boxedType represents the value contained in a Boxed. +type boxedType byte + +const ( + // Zero value + boxedUnknown boxedType = iota + + // A control operator. The data being 1 = start of nested tuple, 2 = end of tuple. + boxedCtrl + + // A boxed nil value. + boxedNil + + // A boxed boolean value. The data being 1 = true, 0 = false. + boxedBool + + // A boxed byte string. The pointer contains the slice start, and the data contains the length. + boxedBytes + // A boxed UTF-8 string. The pointer contains the slice start, and the data contains the length. + boxedString + // A boxed nested tuple. The pointer contains the slice start, and the data contains the length. + boxedTuple + // A boxed fixed-length byte string. The pointer contains the slice start, and the data contains the length. + boxedFixedLen + + // A boxed int64. The data contains the value. + boxedInt64 + // A boxed uint64. The data contains the value. + boxedUint64 + // A boxed float32. The data contains the float32's bits in the lower 32 bits. + boxedFloat32 + // A boxed float64. The data contains the float64's bits. + boxedFloat64 + // A boxed UUID. The pointer points to the start of the 16 bytes of the UUID. + boxedUUID + // A boxed 12-byte versionstamp. The pointer points to the start of the 12 bytes + // of the versionstamp. + boxedVersionstamp +) + +// A Boxed is a tagged union representing a tuple value that can be +// unboxed to retrieve the content. +// +// A boxed containing an integer can be cast to any integer type as long +// as it does not overflow. When using Unbox(), int64 is preferred over +// uint64. +type Boxed struct { + // The type contained in the box. + bt boxedType + // The pointer in the box. + ptr unsafe.Pointer + // The data in the box, or the length of the data. + data uint64 +} + +// A BoxedTuple is a tuple with values represented as Boxed. +type BoxedTuple []Boxed + +func newBoxedNil() Boxed { + return Boxed{bt: boxedNil} +} + +func newBoxedBool(b bool) Boxed { + val := Boxed{bt: boxedBool, data: 0} + if b { + val.data = 1 + } + return val +} + +func newBoxedBytes(b []byte) Boxed { + val := Boxed{bt: boxedBytes, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: uint64(len(b))} + return val +} + +func newBoxedFixedLen(b []byte) Boxed { + val := Boxed{bt: boxedFixedLen, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: uint64(len(b))} + return val +} + +func newBoxedTuple(b BoxedTuple) Boxed { + val := Boxed{bt: boxedTuple, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: uint64(len(b))} + return val +} + +func newBoxedUUID(b []byte) Boxed { + val := Boxed{bt: boxedUUID, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: 16} + return val +} + +func newBoxedVersionstamp(b []byte) Boxed { + val := Boxed{bt: boxedVersionstamp, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: 12} + return val +} + +func newBoxedString(b []byte) Boxed { + val := Boxed{bt: boxedString, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: uint64(len(b))} + return val +} + +func newBoxedInt64(b int64) Boxed { + return Boxed{bt: boxedInt64, data: uint64(b)} +} + +func newBoxedUInt64(b uint64) Boxed { + return Boxed{bt: boxedInt64, data: b} +} + +func newBoxedFloat64(b float64) Boxed { + return Boxed{bt: boxedFloat64, data: math.Float64bits(b)} +} + +func newBoxedFloat32(b float32) Boxed { + return Boxed{bt: boxedFloat32, data: uint64(math.Float32bits(b))} +} + +func (b Boxed) assert(ok bool, msg string) { + if !ok { + panic(msg) + } +} + +// Checks if the given Boxed contains a nil value. +func (b Boxed) IsNil() bool { + return b.bt == boxedNil +} + +// Tries to cast the Boxed to a boolean, and returns the +// value and if the cast succeded. +func (b Boxed) SafeBool() (bool, bool) { + if b.bt != boxedBool { + return false, false + } + return b.data != 0, true +} + +// Tries to cast the Boxed to a boolean and panics if it fails. +func (b Boxed) Bool() bool { + sb, ok := b.SafeBool() + b.assert(ok, "cannot cast to bool") + return sb +} + +// Tries to cast the Boxed to a float64, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeFloat64() (float64, bool) { + if b.bt != boxedFloat64 { + return 0.0, false + } + return math.Float64frombits(b.data), true +} + +// Tries to cast the Boxed to a float64 and panics if it fails. +func (b Boxed) Float64() float64 { + v, ok := b.SafeFloat64() + b.assert(ok, "cannot cast to float64") + return v +} + +// Tries to cast the Boxed to a float32, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeFloat32() (float32, bool) { + if b.bt != boxedFloat32 { + return 0.0, false + } + return math.Float32frombits(uint32(b.data)), true +} + +// Tries to cast the Boxed to a float32 and panics if it fails. +func (b Boxed) Float32() float32 { + v, ok := b.SafeFloat32() + b.assert(ok, "cannot cast to float32") + return v +} + +// Tries to cast the Boxed to a BoxedTuple, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeTuple() (BoxedTuple, bool) { + if b.bt != boxedTuple { + return nil, false + } + return unsafe.Slice((*Boxed)(b.ptr), int(b.data)), true +} + +// Tries to cast the Boxed to a Tuple and panics if it fails. +func (b Boxed) Tuple() BoxedTuple { + v, ok := b.SafeTuple() + b.assert(ok, "cannot cast to BoxedTuple") + return v +} + +// Tries to cast the Boxed to a byte slice, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeBytes() ([]byte, bool) { + if b.bt != boxedBytes { + return nil, false + } + return unsafe.Slice((*byte)(b.ptr), int(b.data)), true +} + +// Tries to cast the Boxed to a byte slice and panics if it fails. +func (b Boxed) Bytes() []byte { + v, ok := b.SafeBytes() + b.assert(ok, "cannot cast to []byte") + return v +} + +// Tries to cast the Boxed to a string, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeString() (string, bool) { + if b.bt != boxedString { + return "", false + } + return unsafe.String((*byte)(b.ptr), int(b.data)), true +} + +// Tries to cast the Boxed to a string and panics if it fails. +// +// This is not named String() to not conflict with the function used to +// cast to a string for printing. +func (b Boxed) AsString() string { + v, ok := b.SafeString() + b.assert(ok, "cannot cast to string") + return v +} + +// Tries to cast the Boxed to a FixedLen, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeFixedLen() (FixedLen, bool) { + if b.bt != boxedFixedLen { + return nil, false + } + return unsafe.Slice((*byte)(b.ptr), int(b.data)), true +} + +// Tries to cast the Boxed to a FixedLen and panics if it fails. +func (b Boxed) FixedLen() FixedLen { + v, ok := b.SafeFixedLen() + b.assert(ok, "cannot cast to FixedLen") + return v +} + +// Tries to cast the Boxed to a UUID, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeUUID() (uuid.UUID, bool) { + if b.bt != boxedUUID { + return uuid.UUID{}, false + } + return uuid.UUID(unsafe.Slice((*byte)(b.ptr), 16)), true +} + +// Tries to cast the Boxed to a UUID and panics if it fails. +func (b Boxed) UUID() uuid.UUID { + v, ok := b.SafeUUID() + b.assert(ok, "cannot cast to UUID") + return v +} + +// Tries to cast the Boxed to a int64, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeInt64() (int64, bool) { + switch b.bt { + case boxedInt64: + return int64(b.data), true + case boxedUint64: + if b.data < 0x8000_0000_0000_0000 { + return int64(b.data), true + } + return 0, false + default: + return 0, false + } +} + +// Tries to cast the Boxed to a int64 and panics if it fails. +func (b Boxed) Int64() int64 { + v, ok := b.SafeInt64() + b.assert(ok, "cannot cast to int64") + return v +} + +// Tries to cast the Boxed to a Versionstamp, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeVersionstamp() (Versionstamp, bool) { + if b.bt != boxedVersionstamp { + return Versionstamp{}, false + } + slice := unsafe.Slice((*byte)(b.ptr), 12) + out := Versionstamp{} + out.TransactionVersion = [10]byte(slice[0:10]) + out.UserVersion = binary.BigEndian.Uint16(slice[10:12]) + return out, true +} + +// Tries to cast the Boxed to a Versionstamp and panics if it fails. +func (b Boxed) Versionstamp() Versionstamp { + v, ok := b.SafeVersionstamp() + b.assert(ok, "cannot cast to Versionstamp") + return v +} + +// Tries to cast the Boxed to a uint64, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeUint64() (uint64, bool) { + switch b.bt { + case boxedUint64: + return b.data, true + case boxedInt64: + if b.data < 0x8000_0000_0000_0000 { + return b.data, true + } + return 0, false + default: + return 0, false + } +} + +// Tries to cast the Boxed to a uint64 and panics if it fails. +func (b Boxed) Uint64() uint64 { + v, ok := b.SafeUint64() + b.assert(ok, "cannot cast to uint64") + return v +} + +// Unboxes the content of the Boxed to its value. +func (b Boxed) Unbox() any { + switch b.bt { + case boxedNil: + return nil + case boxedString: + return b.AsString() + case boxedBytes: + return b.Bytes() + case boxedFixedLen: + return b.FixedLen() + case boxedFloat32: + return b.Float32() + case boxedFloat64: + return b.Float64() + case boxedTuple: + return b.Tuple() + case boxedInt64: + return b.Int64() + case boxedUint64: + return b.Uint64() + case boxedBool: + return b.Bool() + case boxedUUID: + return b.UUID() + case boxedVersionstamp: + return b.Versionstamp() + default: + panic("unknown type") + } +} + +// Converts the BoxedTuple to a normal Tuple. +func (bt BoxedTuple) ToTuple() Tuple { + out := make(Tuple, len(bt)) + for i, entry := range bt { + if entry.bt == boxedTuple { + out[i] = entry.Tuple().ToTuple() + } else { + out[i] = entry.Unbox() + } + } + return out +} diff --git a/bindings/go/src/fdb/tuple/testdata/tuples.golden b/bindings/go/src/fdb/tuple/testdata/tuples.golden index 9f1c3f11dfe558388c950786656841ead933bfd5..9ddb85da788bb834882a8f04aa07849f0337908b 100644 GIT binary patch delta 652 zcmdnR`$3;(QBM&1N1H(U#hd{Q#WC51M$$wb))pNULR-~r*q~@iUWc*`d zG+|CmN;YNul1L{|PbYhDSu@h@7eNacCSde4OfF2FO8@`> delta 106 zcmV-w0G0pp5xNZ)4*!7!0RsPm009gE3IG892B`mn01lI-1Zn+a diff --git a/bindings/go/src/fdb/tuple/tuple_test.go b/bindings/go/src/fdb/tuple/tuple_test.go index 97383a4d336..0cca644962e 100644 --- a/bindings/go/src/fdb/tuple/tuple_test.go +++ b/bindings/go/src/fdb/tuple/tuple_test.go @@ -7,7 +7,11 @@ import ( "fmt" "math/rand" "os" + "reflect" + "strings" "testing" + + "github.com/google/uuid" ) var update = flag.Bool("update", false, "update .golden files") @@ -44,7 +48,7 @@ func writeGolden(t *testing.T, golden map[string][]byte) { } } -var testUUID = UUID{ +var testUUID = uuid.UUID{ 0x11, 0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, } @@ -81,7 +85,7 @@ var testCases = []struct { name string tuple Tuple }{ - {"Simple", Tuple{testUUID, "foobarbaz", 1234, nil}}, + {"Simple", Tuple{testUUID, "foobarbaz", int64(1234), nil}}, {"Namespaces", Tuple{testUUID, "github", "com", "apple", "foundationdb", "tree"}}, {"ManyStrings", mktuple(genString, 8)}, {"ManyStringsNil", mktuple(genStringNil, 8)}, @@ -95,6 +99,8 @@ var testCases = []struct { {"UUIDs", Tuple{testUUID, true, testUUID, false, testUUID, true, testUUID, false, testUUID, true}}, {"NilCases", Tuple{"\x00", "\x00\xFF", "\x00\x00\x00", "\xFF\x00", ""}}, {"Nested", Tuple{testUUID, mktuple(genInt, 4), nil, mktuple(genBytes, 4), nil, mktuple(genDouble, 4), nil}}, + {"FixedLength", Tuple{FixedLen("abc"), FixedLen(strings.Repeat("a", 500)), "abc"}}, + {"Negatives", Tuple{int64(-1), int64(-100), int64(-1000), int64(-100000000), int64(-0x8000_0000_0000_0000), float64(-0.5)}}, } func TestTuplePacking(t *testing.T) { @@ -118,6 +124,17 @@ func TestTuplePacking(t *testing.T) { if !bytes.Equal(result, golden[tt.name]) { t.Errorf("packing mismatch: expected %v, got %v", golden[tt.name], result) } + + unpacked, err := Unpack(result) + if err != nil { + t.Errorf("unpack error: %v", err) + } else { + if tt.name == "Simple" { + } + if !reflect.DeepEqual(unpacked, tt.tuple) { + t.Errorf("unpack mismatch: expected %v, got %v", tt.tuple, unpacked) + } + } }) } @@ -126,17 +143,91 @@ func TestTuplePacking(t *testing.T) { } } +func TestTuplePackingV2(t *testing.T) { + var golden map[string][]byte + + if *update { + golden = make(map[string][]byte) + } else { + golden = loadGolden(t) + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + result := tt.tuple.Pack() + + if *update { + golden[tt.name] = result + return + } + + if !bytes.Equal(result, golden[tt.name]) { + t.Errorf("packing mismatch: expected %v, got %v", golden[tt.name], result) + } + + unpacked, err := UnpackToBoxed(result) + if err != nil { + t.Errorf("unpack error: %v", err) + } else { + if !reflect.DeepEqual(unpacked.ToTuple(), tt.tuple) { + t.Errorf("unpack mismatch: expected %v, got %v", tt.tuple, unpacked.ToTuple()) + } + } + }) + } +} + func BenchmarkTuplePacking(b *testing.B) { for _, bm := range testCases { b.Run(bm.name, func(b *testing.B) { + b.ReportAllocs() tuple := bm.tuple - for i := 0; i < b.N; i++ { + for b.Loop() { _ = tuple.Pack() } }) } } +func BenchmarkTupleUnpackV2(b *testing.B) { + for _, bm := range testCases { + b.Run(bm.name, func(b *testing.B) { + b.ReportAllocs() + t := bm.tuple + packed := t.Pack() + for b.Loop() { + _, _ = UnpackToBoxed(packed) + } + }) + } +} + +func BenchmarkTupleUnpackV2Normal(b *testing.B) { + for _, bm := range testCases { + b.Run(bm.name, func(b *testing.B) { + b.ReportAllocs() + t := bm.tuple + packed := t.Pack() + for b.Loop() { + _, _ = UnpackV2(packed) + } + }) + } +} + +func BenchmarkTupleUnpack(b *testing.B) { + for _, bm := range testCases { + b.Run(bm.name, func(b *testing.B) { + b.ReportAllocs() + t := bm.tuple + packed := t.Pack() + for b.Loop() { + _, _ = Unpack(packed) + } + }) + } +} + func TestTupleString(t *testing.T) { testCases := []struct { input Tuple diff --git a/bindings/go/src/fdb/tuple/tuple_v2.go b/bindings/go/src/fdb/tuple/tuple_v2.go new file mode 100644 index 00000000000..efb976b88bb --- /dev/null +++ b/bindings/go/src/fdb/tuple/tuple_v2.go @@ -0,0 +1,334 @@ +package tuple + +import ( + "bytes" + "encoding/binary" + "fmt" + "math" + "unsafe" +) + +// The reason we do not pass a streamingUnpacker is that the +// unpacker may escape to the heap, as all parameters to +// closures are assumed to escape to the heap. +type unpackerState struct { + buf []byte + ptr int + nesting int +} + +type unpackFunc func(unpackerState) (Boxed, int, error) + +type unpackFuncTab [256]unpackFunc + +func createByteDecoder(bt boxedType) unpackFunc { + return func(d unpackerState) (Boxed, int, error) { + end, err := findEnd(d.buf, d.ptr+1) + if err != nil { + return Boxed{}, 0, err + } + cptr := d.ptr + 1 + buf := d.buf[cptr : d.ptr+1+end.length] + if end.decoded != end.length { + buf = bytes.Replace(buf, []byte{0x00, 0xFF}, []byte{0x00}, end.length-end.decoded) + } + return Boxed{bt: bt, ptr: unsafe.Pointer(unsafe.SliceData(buf)), data: uint64(len(buf))}, 1 + 1 + end.length, nil + } +} + +func createIntDecoder(length int, negative bool) unpackFunc { + return func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + length + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for negative int64 (len %d)", length) + } + tmp := make([]byte, 8) + if negative { + for i := range tmp { + tmp[i] = 0xff + } + } + copy(tmp[8-length:], d.buf[d.ptr+1:d.ptr+1+length]) + if negative { + return newBoxedInt64(-int64(^binary.BigEndian.Uint64(tmp))), 1 + length, nil + } else { + return newBoxedInt64(int64(binary.BigEndian.Uint64(tmp))), 1 + length, nil + } + } +} + +func adjustFloatBytesDecode(b []byte) { + if b[0]&0x80 == 0x00 { + for i := 0; i < len(b); i++ { + b[i] = b[i] ^ 0xff + } + } else { + b[0] = b[0] ^ 0x80 + } +} + +var unpackFunctab unpackFuncTab = unpackFuncTab{ + nilCode: func(d unpackerState) (Boxed, int, error) { + if d.nesting > 0 { + if d.ptr+1 < len(d.buf) && + d.buf[d.ptr+1] == 0xff { + return newBoxedNil(), 2, nil + } else { + return Boxed{bt: boxedCtrl, data: 2}, 1, nil + } + } else { + return newBoxedNil(), 1, nil + } + }, + + bytesCode: createByteDecoder(boxedBytes), + stringCode: createByteDecoder(boxedString), + + trueCode: func(ds unpackerState) (Boxed, int, error) { + return newBoxedBool(true), 1, nil + }, + falseCode: func(ds unpackerState) (Boxed, int, error) { + return newBoxedBool(false), 1, nil + }, + + floatCode: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 4 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for float32") + } + bytes := make([]byte, 4) + copy(bytes, d.buf[d.ptr+1:d.ptr+5]) + adjustFloatBytesDecode(bytes) + return newBoxedFloat32(math.Float32frombits(binary.BigEndian.Uint32(bytes))), 5, nil + }, + + doubleCode: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 8 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for float64") + } + bytes := make([]byte, 8) + copy(bytes, d.buf[d.ptr+1:d.ptr+9]) + adjustFloatBytesDecode(bytes) + return newBoxedFloat64(math.Float64frombits(binary.BigEndian.Uint64(bytes))), 9, nil + }, + + uuidCode: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 16 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for UUID") + } + u := newBoxedUUID(d.buf[d.ptr+1 : d.ptr+17]) + return u, 17, nil + }, + + versionstampCode: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 12 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for Versionstamp") + } + vs := newBoxedVersionstamp(d.buf[d.ptr+1 : d.ptr+13]) + return vs, 13, nil + }, + + 0x0c: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 8 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for negative int64 (len 8)") + } + buf := make([]byte, 8) + copy(buf, d.buf[d.ptr+1:d.ptr+9]) + inv := ^binary.BigEndian.Uint64(buf) + if inv < ^uint64(0x8000_0000_0000_0000) { + return Boxed{}, 0, fmt.Errorf("bigint not supported") + } else { + return newBoxedInt64(-int64(inv)), 9, nil + } + }, + 0x0d: createIntDecoder(7, true), + 0x0e: createIntDecoder(6, true), + 0x0f: createIntDecoder(5, true), + 0x10: createIntDecoder(4, true), + 0x11: createIntDecoder(3, true), + 0x12: createIntDecoder(2, true), + 0x13: createIntDecoder(1, true), + // zero positive integer + 0x14: func(d unpackerState) (Boxed, int, error) { + return newBoxedInt64(0), 1, nil + }, + 0x15: createIntDecoder(1, false), + 0x16: createIntDecoder(2, false), + 0x17: createIntDecoder(3, false), + 0x18: createIntDecoder(4, false), + 0x19: createIntDecoder(5, false), + 0x1a: createIntDecoder(6, false), + 0x1b: createIntDecoder(7, false), + 0x1c: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 8 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for uint64 (len 8)") + } + buf := make([]byte, 8) + copy(buf, d.buf[d.ptr+1:d.ptr+9]) + return newBoxedUInt64(binary.BigEndian.Uint64(buf)), 9, nil + }, + + fixedLengthCode: func(d unpackerState) (Boxed, int, error) { + if d.ptr+1 >= len(d.buf) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for FixedLen") + } + dataLength := int(d.buf[d.ptr+1]) + if len(d.buf) < d.ptr+2+dataLength { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for FixedLen") + } + fl := newBoxedFixedLen(d.buf[d.ptr+2 : d.ptr+2+dataLength]) + return fl, 2 + dataLength, nil + }, + fixedLengthCode + 1: func(d unpackerState) (Boxed, int, error) { + if d.ptr+2 >= len(d.buf) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for FixedLen") + } + dataLength := int(binary.BigEndian.Uint16(d.buf[d.ptr+1 : d.ptr+3])) + if len(d.buf) < d.ptr+3+dataLength { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for FixedLen") + } + fl := newBoxedFixedLen(d.buf[d.ptr+3 : d.ptr+3+dataLength]) + return fl, 3 + dataLength, nil + }, + + nestedCode: func(d unpackerState) (Boxed, int, error) { + return Boxed{bt: boxedCtrl, data: 1}, 1, nil + }, +} + +type streamingUnpacker struct { + buf []byte + ptr int + nesting int +} + +type endInfo struct { + length int + decoded int +} + +//go:nosplit +func findEnd(buf []byte, start int) (ei endInfo, err error) { + ptr := unsafe.Pointer(&buf[start]) + rem := len(buf) - start + for rem > 0 { + if *(*byte)(ptr) == 0 { + ptr = unsafe.Add(ptr, 1) + rem-- + if rem > 0 && *(*byte)(ptr) == 255 { + ei.length++ + } else { + return + } + } + ei.length++ + ei.decoded++ + ptr = unsafe.Add(ptr, 1) + rem-- + } + err = fmt.Errorf("tuple byte string is not terminated") + return +} + +func (su *streamingUnpacker) Next() (Boxed, error) { + if len(su.buf) <= su.ptr { + return Boxed{}, nil + } + code := su.buf[su.ptr] + f := unpackFunctab[code] + if f == nil { + return Boxed{}, fmt.Errorf("unknown tuple type code %2x", code) + } + bv, n, err := f(unpackerState{buf: su.buf, ptr: su.ptr, nesting: su.nesting}) + if err != nil { + return Boxed{}, err + } + su.ptr += n + if bv.bt == boxedCtrl { + switch bv.data { + case 1: + su.nesting++ + case 2: + su.nesting-- + } + } + return bv, nil +} + +func (su *streamingUnpacker) HasMore() bool { + return len(su.buf) > su.ptr +} + +func unpackV2Internal(up *streamingUnpacker) (BoxedTuple, error) { + // This allows implementing "perfect" allocation of the tuple + // even though we don't know its length by: + // 1. Allocating the output slice on the stack with a hard to reach capacity + // 2. Appending to the output slice only without allowing it to escape + // 3. Copying the slice to a new heap-allocated slice + // + // If we did not copy the tuple in the end, the allocation would + // escape immediately on step 1, and we would allocate the entire + // 64x 24B slice, even if we don't come anywhere close. + // + // Starting with an empty slice would result in append gradually + // expanding the slice, which would result in multiple slices. + // + // This may spill to heap if we exceed 64 elements. This is very + // unlikely. + n := make(BoxedTuple, 0, 64) + + // unpacking loop +loop: + for up.HasMore() { + next, err := up.Next() + if err != nil { + return nil, err + } + if next.bt == boxedCtrl { + switch next.data { + case 1: + v, err := unpackV2Internal(up) + if err != nil { + return nil, err + } + n = append(n, newBoxedTuple(v)) + case 2: + break loop + default: + panic("unknown action") + } + } else { + n = append(n, next) + } + } + c := make(BoxedTuple, len(n)) + copy(c, n) + return c, nil +} + +// UnpackToBoxed unpacks a byte slice into a FoundationDB +// tuple using BoxedTuple. +// +// The provided byte slice must not be modified while the +// tuple is being used, as zero-copy decoding is attempted. +func UnpackToBoxed(b []byte) (BoxedTuple, error) { + if len(b) == 0 { + return BoxedTuple{}, nil + } + unpacker := &streamingUnpacker{buf: b} + return unpackV2Internal(unpacker) +} + +// Unpack unpacks a byte slice into a FoundationDB tuple. +// +// The provided byte slice must not be modified while the +// tuple is being used, as zero-copy decoding is attempted. +func UnpackV2(b []byte) (Tuple, error) { + if len(b) == 0 { + return Tuple{}, nil + } + unpacker := &streamingUnpacker{buf: b} + u, err := unpackV2Internal(unpacker) + if err != nil { + return nil, err + } + return u.ToTuple(), nil +}