diff --git a/bindings/go/go.mod b/bindings/go/go.mod index 16e502baaf1..3a689dcd4da 100644 --- a/bindings/go/go.mod +++ b/bindings/go/go.mod @@ -1,4 +1,5 @@ module github.com/apple/foundationdb/bindings/go -// The FoundationDB go bindings currently have no external golang dependencies outside of -// the go standard library. +go 1.20 + +require github.com/google/uuid v1.6.0 // indirect diff --git a/bindings/go/go.sum b/bindings/go/go.sum new file mode 100644 index 00000000000..7790d7c3e03 --- /dev/null +++ b/bindings/go/go.sum @@ -0,0 +1,2 @@ +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= diff --git a/bindings/go/src/fdb/tuple/boxed.go b/bindings/go/src/fdb/tuple/boxed.go new file mode 100644 index 00000000000..edf3a983735 --- /dev/null +++ b/bindings/go/src/fdb/tuple/boxed.go @@ -0,0 +1,378 @@ +package tuple + +import ( + "encoding/binary" + "math" + "unsafe" + + "github.com/google/uuid" +) + +// A boxedType represents the value contained in a Boxed. +type boxedType byte + +const ( + // Zero value + boxedUnknown boxedType = iota + + // A control operator. The data being 1 = start of nested tuple, 2 = end of tuple. + boxedCtrl + + // A boxed nil value. + boxedNil + + // A boxed boolean value. The data being 1 = true, 0 = false. + boxedBool + + // A boxed byte string. The pointer contains the slice start, and the data contains the length. + boxedBytes + // A boxed UTF-8 string. The pointer contains the slice start, and the data contains the length. + boxedString + // A boxed nested tuple. The pointer contains the slice start, and the data contains the length. + boxedTuple + // A boxed fixed-length byte string. The pointer contains the slice start, and the data contains the length. + boxedFixedLen + + // A boxed int64. The data contains the value. + boxedInt64 + // A boxed uint64. The data contains the value. + boxedUint64 + // A boxed float32. The data contains the float32's bits in the lower 32 bits. + boxedFloat32 + // A boxed float64. The data contains the float64's bits. + boxedFloat64 + // A boxed UUID. The pointer points to the start of the 16 bytes of the UUID. + boxedUUID + // A boxed 12-byte versionstamp. The pointer points to the start of the 12 bytes + // of the versionstamp. + boxedVersionstamp +) + +// A Boxed is a tagged union representing a tuple value that can be +// unboxed to retrieve the content. +// +// A boxed containing an integer can be cast to any integer type as long +// as it does not overflow. When using Unbox(), int64 is preferred over +// uint64. +type Boxed struct { + // The type contained in the box. + bt boxedType + // The pointer in the box. + ptr unsafe.Pointer + // The data in the box, or the length of the data. + data uint64 +} + +// A BoxedTuple is a tuple with values represented as Boxed. +type BoxedTuple []Boxed + +func newBoxedNil() Boxed { + return Boxed{bt: boxedNil} +} + +func newBoxedBool(b bool) Boxed { + val := Boxed{bt: boxedBool, data: 0} + if b { + val.data = 1 + } + return val +} + +func newBoxedBytes(b []byte) Boxed { + val := Boxed{bt: boxedBytes, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: uint64(len(b))} + return val +} + +func newBoxedFixedLen(b []byte) Boxed { + val := Boxed{bt: boxedFixedLen, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: uint64(len(b))} + return val +} + +func newBoxedTuple(b BoxedTuple) Boxed { + val := Boxed{bt: boxedTuple, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: uint64(len(b))} + return val +} + +func newBoxedUUID(b []byte) Boxed { + val := Boxed{bt: boxedUUID, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: 16} + return val +} + +func newBoxedVersionstamp(b []byte) Boxed { + val := Boxed{bt: boxedVersionstamp, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: 12} + return val +} + +func newBoxedString(b []byte) Boxed { + val := Boxed{bt: boxedString, ptr: unsafe.Pointer(unsafe.SliceData(b)), data: uint64(len(b))} + return val +} + +func newBoxedInt64(b int64) Boxed { + return Boxed{bt: boxedInt64, data: uint64(b)} +} + +func newBoxedUInt64(b uint64) Boxed { + return Boxed{bt: boxedInt64, data: b} +} + +func newBoxedFloat64(b float64) Boxed { + return Boxed{bt: boxedFloat64, data: math.Float64bits(b)} +} + +func newBoxedFloat32(b float32) Boxed { + return Boxed{bt: boxedFloat32, data: uint64(math.Float32bits(b))} +} + +func (b Boxed) assert(ok bool, msg string) { + if !ok { + panic(msg) + } +} + +// Checks if the given Boxed contains a nil value. +func (b Boxed) IsNil() bool { + return b.bt == boxedNil +} + +// Tries to cast the Boxed to a boolean, and returns the +// value and if the cast succeded. +func (b Boxed) SafeBool() (bool, bool) { + if b.bt != boxedBool { + return false, false + } + return b.data != 0, true +} + +// Tries to cast the Boxed to a boolean and panics if it fails. +func (b Boxed) Bool() bool { + sb, ok := b.SafeBool() + b.assert(ok, "cannot cast to bool") + return sb +} + +// Tries to cast the Boxed to a float64, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeFloat64() (float64, bool) { + if b.bt != boxedFloat64 { + return 0.0, false + } + return math.Float64frombits(b.data), true +} + +// Tries to cast the Boxed to a float64 and panics if it fails. +func (b Boxed) Float64() float64 { + v, ok := b.SafeFloat64() + b.assert(ok, "cannot cast to float64") + return v +} + +// Tries to cast the Boxed to a float32, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeFloat32() (float32, bool) { + if b.bt != boxedFloat32 { + return 0.0, false + } + return math.Float32frombits(uint32(b.data)), true +} + +// Tries to cast the Boxed to a float32 and panics if it fails. +func (b Boxed) Float32() float32 { + v, ok := b.SafeFloat32() + b.assert(ok, "cannot cast to float32") + return v +} + +// Tries to cast the Boxed to a BoxedTuple, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeTuple() (BoxedTuple, bool) { + if b.bt != boxedTuple { + return nil, false + } + return unsafe.Slice((*Boxed)(b.ptr), int(b.data)), true +} + +// Tries to cast the Boxed to a Tuple and panics if it fails. +func (b Boxed) Tuple() BoxedTuple { + v, ok := b.SafeTuple() + b.assert(ok, "cannot cast to BoxedTuple") + return v +} + +// Tries to cast the Boxed to a byte slice, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeBytes() ([]byte, bool) { + if b.bt != boxedBytes { + return nil, false + } + return unsafe.Slice((*byte)(b.ptr), int(b.data)), true +} + +// Tries to cast the Boxed to a byte slice and panics if it fails. +func (b Boxed) Bytes() []byte { + v, ok := b.SafeBytes() + b.assert(ok, "cannot cast to []byte") + return v +} + +// Tries to cast the Boxed to a string, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeString() (string, bool) { + if b.bt != boxedString { + return "", false + } + return unsafe.String((*byte)(b.ptr), int(b.data)), true +} + +// Tries to cast the Boxed to a string and panics if it fails. +// +// This is not named String() to not conflict with the function used to +// cast to a string for printing. +func (b Boxed) AsString() string { + v, ok := b.SafeString() + b.assert(ok, "cannot cast to string") + return v +} + +// Tries to cast the Boxed to a FixedLen, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeFixedLen() (FixedLen, bool) { + if b.bt != boxedFixedLen { + return nil, false + } + return unsafe.Slice((*byte)(b.ptr), int(b.data)), true +} + +// Tries to cast the Boxed to a FixedLen and panics if it fails. +func (b Boxed) FixedLen() FixedLen { + v, ok := b.SafeFixedLen() + b.assert(ok, "cannot cast to FixedLen") + return v +} + +// Tries to cast the Boxed to a UUID, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeUUID() (uuid.UUID, bool) { + if b.bt != boxedUUID { + return uuid.UUID{}, false + } + return uuid.UUID(unsafe.Slice((*byte)(b.ptr), 16)), true +} + +// Tries to cast the Boxed to a UUID and panics if it fails. +func (b Boxed) UUID() uuid.UUID { + v, ok := b.SafeUUID() + b.assert(ok, "cannot cast to UUID") + return v +} + +// Tries to cast the Boxed to a int64, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeInt64() (int64, bool) { + switch b.bt { + case boxedInt64: + return int64(b.data), true + case boxedUint64: + if b.data < 0x8000_0000_0000_0000 { + return int64(b.data), true + } + return 0, false + default: + return 0, false + } +} + +// Tries to cast the Boxed to a int64 and panics if it fails. +func (b Boxed) Int64() int64 { + v, ok := b.SafeInt64() + b.assert(ok, "cannot cast to int64") + return v +} + +// Tries to cast the Boxed to a Versionstamp, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeVersionstamp() (Versionstamp, bool) { + if b.bt != boxedVersionstamp { + return Versionstamp{}, false + } + slice := unsafe.Slice((*byte)(b.ptr), 12) + out := Versionstamp{} + out.TransactionVersion = [10]byte(slice[0:10]) + out.UserVersion = binary.BigEndian.Uint16(slice[10:12]) + return out, true +} + +// Tries to cast the Boxed to a Versionstamp and panics if it fails. +func (b Boxed) Versionstamp() Versionstamp { + v, ok := b.SafeVersionstamp() + b.assert(ok, "cannot cast to Versionstamp") + return v +} + +// Tries to cast the Boxed to a uint64, and returns the +// cast value and if the cast succeded. +func (b Boxed) SafeUint64() (uint64, bool) { + switch b.bt { + case boxedUint64: + return b.data, true + case boxedInt64: + if b.data < 0x8000_0000_0000_0000 { + return b.data, true + } + return 0, false + default: + return 0, false + } +} + +// Tries to cast the Boxed to a uint64 and panics if it fails. +func (b Boxed) Uint64() uint64 { + v, ok := b.SafeUint64() + b.assert(ok, "cannot cast to uint64") + return v +} + +// Unboxes the content of the Boxed to its value. +func (b Boxed) Unbox() any { + switch b.bt { + case boxedNil: + return nil + case boxedString: + return b.AsString() + case boxedBytes: + return b.Bytes() + case boxedFixedLen: + return b.FixedLen() + case boxedFloat32: + return b.Float32() + case boxedFloat64: + return b.Float64() + case boxedTuple: + return b.Tuple() + case boxedInt64: + return b.Int64() + case boxedUint64: + return b.Uint64() + case boxedBool: + return b.Bool() + case boxedUUID: + return b.UUID() + case boxedVersionstamp: + return b.Versionstamp() + default: + panic("unknown type") + } +} + +// Converts the BoxedTuple to a normal Tuple. +func (bt BoxedTuple) ToTuple() Tuple { + out := make(Tuple, len(bt)) + for i, entry := range bt { + if entry.bt == boxedTuple { + out[i] = entry.Tuple().ToTuple() + } else { + out[i] = entry.Unbox() + } + } + return out +} diff --git a/bindings/go/src/fdb/tuple/testdata/tuples.golden b/bindings/go/src/fdb/tuple/testdata/tuples.golden index 9f1c3f11dfe..9ddb85da788 100644 Binary files a/bindings/go/src/fdb/tuple/testdata/tuples.golden and b/bindings/go/src/fdb/tuple/testdata/tuples.golden differ diff --git a/bindings/go/src/fdb/tuple/tuple.go b/bindings/go/src/fdb/tuple/tuple.go index dd6f7a61f4e..cc38654b67b 100644 --- a/bindings/go/src/fdb/tuple/tuple.go +++ b/bindings/go/src/fdb/tuple/tuple.go @@ -47,6 +47,7 @@ import ( "strings" "github.com/apple/foundationdb/bindings/go/src/fdb" + "github.com/google/uuid" ) // A TupleElement is one of the types that may be encoded in FoundationDB @@ -88,7 +89,7 @@ func printTuple(tuple Tuple, sb *strings.Builder) { sb.WriteString("") case string: sb.WriteString(strconv.Quote(t)) - case UUID: + case uuid.UUID: sb.WriteString("UUID(") sb.WriteString(t.String()) sb.WriteString(")") @@ -96,6 +97,10 @@ func printTuple(tuple Tuple, sb *strings.Builder) { sb.WriteString("b\"") sb.WriteString(fdb.Printable(t)) sb.WriteString("\"") + case FixedLen: + sb.WriteString("fb\"") + sb.WriteString(fdb.Printable(t)) + sb.WriteString("\"") default: // For user-defined and standard types, we use standard Go // printer, which itself uses Stringer interface. @@ -110,16 +115,16 @@ func printTuple(tuple Tuple, sb *strings.Builder) { sb.WriteString(")") } -// UUID wraps a basic byte array as a UUID. We do not provide any special -// methods for accessing or generating the UUID, but as Go does not provide -// a built-in UUID type, this simple wrapper allows for other libraries -// to write the output of their UUID type as a 16-byte array into -// an instance of this type. -type UUID [16]byte - -func (uuid UUID) String() string { - return fmt.Sprintf("%x-%x-%x-%x-%x", uuid[0:4], uuid[4:6], uuid[6:8], uuid[8:10], uuid[10:]) -} +// FixedLen is a special type of byte slice that is encoded length-prefixed +// and without substitution. +// +// This is intended to store fixed-length identifiers like hashes and +// public keys, or for cases where variable-length strings are used and +// range-reads are not required. +// +// FixedLen only guarantees correct range read semantics for entries with the +// same length. +type FixedLen []byte // Versionstamp is struct for a FoundationDB verionstamp. Versionstamps are // 12 bytes long composed of a 10 byte transaction version and a 2 byte user @@ -174,6 +179,7 @@ const falseCode = 0x26 const trueCode = 0x27 const uuidCode = 0x30 const versionstampCode = 0x33 +const fixedLengthCode = 0x34 var sizeLimits = []uint64{ 1<<(0*8) - 1, @@ -340,11 +346,26 @@ func (p *packer) encodeDouble(d float64) { p.putBytes(scratch[:]) } -func (p *packer) encodeUUID(u UUID) { +func (p *packer) encodeUUID(u uuid.UUID) { p.putByte(uuidCode) p.putBytes(u[:]) } +func (p *packer) encodeFixedLen(f FixedLen) { + if len(f) < 256 { + p.putByte(fixedLengthCode) + p.putByte(byte(len(f))) + p.putBytes(f) + } else if len(f) < 65536 { + p.putByte(fixedLengthCode + 1) + p.putByte(byte(len(f) >> 8)) + p.putByte(byte(len(f) & 0xff)) + p.putBytes(f) + } else { + panic("FixedLen: too long") + } +} + func (p *packer) encodeVersionstamp(v Versionstamp) { p.putByte(versionstampCode) @@ -360,9 +381,17 @@ func (p *packer) encodeVersionstamp(v Versionstamp) { p.putBytes(v.Bytes()) } +func (p *packer) startTuple() { + p.putByte(nestedCode) +} + +func (p *packer) endTuple() { + p.putByte(0x00) +} + func (p *packer) encodeTuple(t Tuple, nested bool, versionstamps bool) { if nested { - p.putByte(nestedCode) + p.startTuple() } for i, e := range t { @@ -402,8 +431,10 @@ func (p *packer) encodeTuple(t Tuple, nested bool, versionstamps bool) { } else { p.putByte(falseCode) } - case UUID: + case uuid.UUID: p.encodeUUID(e) + case FixedLen: + p.encodeFixedLen(e) case Versionstamp: if versionstamps == false && e.TransactionVersion == incompleteTransactionVersion { panic(fmt.Sprintf("Incomplete Versionstamp included in vanilla tuple pack")) @@ -416,7 +447,7 @@ func (p *packer) encodeTuple(t Tuple, nested bool, versionstamps bool) { } if nested { - p.putByte(0x00) + p.endTuple() } } @@ -439,6 +470,14 @@ func (t Tuple) Pack() []byte { return p.buf } +// This acts the same as Pack() but adds the prefix. +func (t Tuple) PackWithPrefix(prefix []byte) []byte { + p := newPacker() + p.putBytes(prefix) + p.encodeTuple(t, false, false) + return p.buf +} + // PackWithVersionstamp packs the specified tuple into a key for versionstamp // operations. See Pack for more information. This function will return an error // if you attempt to pack a tuple with more than one versionstamp. This function will @@ -536,19 +575,22 @@ func findTerminator(b []byte) int { return length } -func decodeBytes(b []byte) ([]byte, int) { +func decodeBytes(b []byte) ([]byte, int, error) { idx := findTerminator(b[1:]) - return bytes.Replace(b[1:idx+1], []byte{0x00, 0xFF}, []byte{0x00}, -1), idx + 2 + if idx == -1 { + return nil, 0, errors.New("string does not have an end") + } + return bytes.Replace(b[1:idx+1], []byte{0x00, 0xFF}, []byte{0x00}, -1), idx + 2, nil } -func decodeString(b []byte) (string, int) { - bp, idx := decodeBytes(b) - return string(bp), idx +func decodeString(b []byte) (string, int, error) { + bp, idx, err := decodeBytes(b) + return string(bp), idx, err } -func decodeInt(b []byte) (interface{}, int) { +func decodeInt(b []byte) (interface{}, int, error) { if b[0] == intZeroCode { - return int64(0), 1 + return int64(0), 1, nil } var neg bool @@ -559,18 +601,21 @@ func decodeInt(b []byte) (interface{}, int) { neg = true } + if len(b) < (n + 1) { + return nil, 0, fmt.Errorf("insufficient bytes to decode int, need %d, have %d", n+1, len(b)) + } + bp := make([]byte, 8) copy(bp[8-n:], b[1:n+1]) - var ret int64 - binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) + var ret int64 = int64(binary.BigEndian.Uint64(bp)) if neg { - return ret - int64(sizeLimits[n]), n + 1 + return ret - int64(sizeLimits[n]), n + 1, nil } if ret > 0 { - return ret, n + 1 + return ret, n + 1, nil } // The encoded value claimed to be positive yet when put in an int64 @@ -578,15 +623,18 @@ func decodeInt(b []byte) (interface{}, int) { // 64-bit value that uses the most significant bit. This can be fit in a // uint64, so return that. Note that this is the *only* time we return // a uint64. - return uint64(ret), n + 1 + return uint64(ret), n + 1, nil } -func decodeBigInt(b []byte) (interface{}, int) { +func decodeBigInt(b []byte) (interface{}, int, error) { val := new(big.Int) offset := 1 var length int if b[0] == negIntStart || b[0] == posIntEnd { + if len(b) <= 1 { + return nil, 0, fmt.Errorf("insufficient bytes to decode bigint, need 2 but have %d", len(b)) + } length = int(b[1]) if b[0] == negIntStart { length ^= 0xff @@ -598,6 +646,10 @@ func decodeBigInt(b []byte) (interface{}, int) { length = 8 } + if len(b) <= (length + offset) { + return nil, 0, fmt.Errorf("insufficient bytes to decode bigint, need %d but have %d", length+offset, len(b)) + } + val.SetBytes(b[offset : length+offset]) if b[0] < intZeroCode { @@ -608,18 +660,17 @@ func decodeBigInt(b []byte) (interface{}, int) { // This is the only value that fits in an int64 or uint64 that is decoded with this function if val.Cmp(minInt64BigInt) == 0 { - return val.Int64(), length + offset + return val.Int64(), length + offset, nil } - return val, length + offset + return val, length + offset, nil } func decodeFloat(b []byte) (float32, int) { bp := make([]byte, 4) copy(bp, b[1:]) adjustFloatBytes(bp, false) - var ret float32 - binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) + var ret float32 = math.Float32frombits(binary.BigEndian.Uint32(bp)) return ret, 5 } @@ -627,17 +678,38 @@ func decodeDouble(b []byte) (float64, int) { bp := make([]byte, 8) copy(bp, b[1:]) adjustFloatBytes(bp, false) - var ret float64 - binary.Read(bytes.NewBuffer(bp), binary.BigEndian, &ret) + var ret float64 = math.Float64frombits(binary.BigEndian.Uint64(bp)) return ret, 9 } -func decodeUUID(b []byte) (UUID, int) { - var u UUID +func decodeUUID(b []byte) (uuid.UUID, int) { + var u uuid.UUID copy(u[:], b[1:]) return u, 17 } +func decodeFixedLen(b []byte) (FixedLen, int, error) { + off := 0 + length := 0 + if b[0] == fixedLengthCode { + if len(b) < 2 { + return nil, 0, errors.New("FixedLen: too short to decode") + } + length = int(b[1]) + off = 2 + } else if b[0] == fixedLengthCode+1 { + if len(b) < 3 { + return nil, 0, errors.New("FixedLen: too short to decode") + } + length = int(binary.BigEndian.Uint16(b[1:3])) + off = 3 + } + if len(b) < off+length { + return nil, 0, errors.New("FixedLen: too short to decode (content bytes)") + } + return b[off : off+length], off + length, nil +} + func decodeVersionstamp(b []byte) (Versionstamp, int) { var transactionVersion [10]byte var userVersion uint16 @@ -673,15 +745,33 @@ func decodeTuple(b []byte, nested bool) (Tuple, int, error) { return t, i + 1, nil } case b[i] == bytesCode: - el, off = decodeBytes(b[i:]) + var err error + el, off, err = decodeBytes(b[i:]) + if err != nil { + return nil, i, err + } case b[i] == stringCode: - el, off = decodeString(b[i:]) - case negIntStart+1 < b[i] && b[i] < posIntEnd: - el, off = decodeInt(b[i:]) - case negIntStart+1 == b[i] && (b[i+1]&0x80 != 0): - el, off = decodeInt(b[i:]) + var err error + el, off, err = decodeString(b[i:]) + if err != nil { + return nil, i, err + } + case negIntStart+1 == b[i] && len(b) <= (i+1): + return nil, i, fmt.Errorf("insufficient bytes to negative int at position %d", i) + case (negIntStart+1 == b[i] && (b[i+1]&0x80 == 0)) || + (negIntStart+1 < b[i] && b[i] < posIntEnd): + var err error + el, off, err = decodeInt(b[i:]) + if err != nil { + return nil, i, err + } + case negIntStart <= b[i] && b[i] <= posIntEnd: - el, off = decodeBigInt(b[i:]) + var err error + el, off, err = decodeBigInt(b[i:]) + if err != nil { + return nil, i, err + } case b[i] == floatCode: if i+5 > len(b) { return nil, i, fmt.Errorf("insufficient bytes to decode float starting at position %d of byte array for tuple", i) @@ -708,6 +798,12 @@ func decodeTuple(b []byte, nested bool) (Tuple, int, error) { return nil, i, fmt.Errorf("insufficient bytes to decode Versionstamp starting at position %d of byte array for tuple", i) } el, off = decodeVersionstamp(b[i:]) + case b[i] == fixedLengthCode || b[i] == fixedLengthCode+1: + var err error + el, off, err = decodeFixedLen(b[i:]) + if err != nil { + return nil, i, err + } case b[i] == nestedCode: var err error el, off, err = decodeTuple(b[i+1:], true) diff --git a/bindings/go/src/fdb/tuple/tuple_test.go b/bindings/go/src/fdb/tuple/tuple_test.go index 97383a4d336..0cca644962e 100644 --- a/bindings/go/src/fdb/tuple/tuple_test.go +++ b/bindings/go/src/fdb/tuple/tuple_test.go @@ -7,7 +7,11 @@ import ( "fmt" "math/rand" "os" + "reflect" + "strings" "testing" + + "github.com/google/uuid" ) var update = flag.Bool("update", false, "update .golden files") @@ -44,7 +48,7 @@ func writeGolden(t *testing.T, golden map[string][]byte) { } } -var testUUID = UUID{ +var testUUID = uuid.UUID{ 0x11, 0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, 0x11, 0x00, 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF, } @@ -81,7 +85,7 @@ var testCases = []struct { name string tuple Tuple }{ - {"Simple", Tuple{testUUID, "foobarbaz", 1234, nil}}, + {"Simple", Tuple{testUUID, "foobarbaz", int64(1234), nil}}, {"Namespaces", Tuple{testUUID, "github", "com", "apple", "foundationdb", "tree"}}, {"ManyStrings", mktuple(genString, 8)}, {"ManyStringsNil", mktuple(genStringNil, 8)}, @@ -95,6 +99,8 @@ var testCases = []struct { {"UUIDs", Tuple{testUUID, true, testUUID, false, testUUID, true, testUUID, false, testUUID, true}}, {"NilCases", Tuple{"\x00", "\x00\xFF", "\x00\x00\x00", "\xFF\x00", ""}}, {"Nested", Tuple{testUUID, mktuple(genInt, 4), nil, mktuple(genBytes, 4), nil, mktuple(genDouble, 4), nil}}, + {"FixedLength", Tuple{FixedLen("abc"), FixedLen(strings.Repeat("a", 500)), "abc"}}, + {"Negatives", Tuple{int64(-1), int64(-100), int64(-1000), int64(-100000000), int64(-0x8000_0000_0000_0000), float64(-0.5)}}, } func TestTuplePacking(t *testing.T) { @@ -118,6 +124,17 @@ func TestTuplePacking(t *testing.T) { if !bytes.Equal(result, golden[tt.name]) { t.Errorf("packing mismatch: expected %v, got %v", golden[tt.name], result) } + + unpacked, err := Unpack(result) + if err != nil { + t.Errorf("unpack error: %v", err) + } else { + if tt.name == "Simple" { + } + if !reflect.DeepEqual(unpacked, tt.tuple) { + t.Errorf("unpack mismatch: expected %v, got %v", tt.tuple, unpacked) + } + } }) } @@ -126,17 +143,91 @@ func TestTuplePacking(t *testing.T) { } } +func TestTuplePackingV2(t *testing.T) { + var golden map[string][]byte + + if *update { + golden = make(map[string][]byte) + } else { + golden = loadGolden(t) + } + + for _, tt := range testCases { + t.Run(tt.name, func(t *testing.T) { + result := tt.tuple.Pack() + + if *update { + golden[tt.name] = result + return + } + + if !bytes.Equal(result, golden[tt.name]) { + t.Errorf("packing mismatch: expected %v, got %v", golden[tt.name], result) + } + + unpacked, err := UnpackToBoxed(result) + if err != nil { + t.Errorf("unpack error: %v", err) + } else { + if !reflect.DeepEqual(unpacked.ToTuple(), tt.tuple) { + t.Errorf("unpack mismatch: expected %v, got %v", tt.tuple, unpacked.ToTuple()) + } + } + }) + } +} + func BenchmarkTuplePacking(b *testing.B) { for _, bm := range testCases { b.Run(bm.name, func(b *testing.B) { + b.ReportAllocs() tuple := bm.tuple - for i := 0; i < b.N; i++ { + for b.Loop() { _ = tuple.Pack() } }) } } +func BenchmarkTupleUnpackV2(b *testing.B) { + for _, bm := range testCases { + b.Run(bm.name, func(b *testing.B) { + b.ReportAllocs() + t := bm.tuple + packed := t.Pack() + for b.Loop() { + _, _ = UnpackToBoxed(packed) + } + }) + } +} + +func BenchmarkTupleUnpackV2Normal(b *testing.B) { + for _, bm := range testCases { + b.Run(bm.name, func(b *testing.B) { + b.ReportAllocs() + t := bm.tuple + packed := t.Pack() + for b.Loop() { + _, _ = UnpackV2(packed) + } + }) + } +} + +func BenchmarkTupleUnpack(b *testing.B) { + for _, bm := range testCases { + b.Run(bm.name, func(b *testing.B) { + b.ReportAllocs() + t := bm.tuple + packed := t.Pack() + for b.Loop() { + _, _ = Unpack(packed) + } + }) + } +} + func TestTupleString(t *testing.T) { testCases := []struct { input Tuple diff --git a/bindings/go/src/fdb/tuple/tuple_v2.go b/bindings/go/src/fdb/tuple/tuple_v2.go new file mode 100644 index 00000000000..efb976b88bb --- /dev/null +++ b/bindings/go/src/fdb/tuple/tuple_v2.go @@ -0,0 +1,334 @@ +package tuple + +import ( + "bytes" + "encoding/binary" + "fmt" + "math" + "unsafe" +) + +// The reason we do not pass a streamingUnpacker is that the +// unpacker may escape to the heap, as all parameters to +// closures are assumed to escape to the heap. +type unpackerState struct { + buf []byte + ptr int + nesting int +} + +type unpackFunc func(unpackerState) (Boxed, int, error) + +type unpackFuncTab [256]unpackFunc + +func createByteDecoder(bt boxedType) unpackFunc { + return func(d unpackerState) (Boxed, int, error) { + end, err := findEnd(d.buf, d.ptr+1) + if err != nil { + return Boxed{}, 0, err + } + cptr := d.ptr + 1 + buf := d.buf[cptr : d.ptr+1+end.length] + if end.decoded != end.length { + buf = bytes.Replace(buf, []byte{0x00, 0xFF}, []byte{0x00}, end.length-end.decoded) + } + return Boxed{bt: bt, ptr: unsafe.Pointer(unsafe.SliceData(buf)), data: uint64(len(buf))}, 1 + 1 + end.length, nil + } +} + +func createIntDecoder(length int, negative bool) unpackFunc { + return func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + length + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for negative int64 (len %d)", length) + } + tmp := make([]byte, 8) + if negative { + for i := range tmp { + tmp[i] = 0xff + } + } + copy(tmp[8-length:], d.buf[d.ptr+1:d.ptr+1+length]) + if negative { + return newBoxedInt64(-int64(^binary.BigEndian.Uint64(tmp))), 1 + length, nil + } else { + return newBoxedInt64(int64(binary.BigEndian.Uint64(tmp))), 1 + length, nil + } + } +} + +func adjustFloatBytesDecode(b []byte) { + if b[0]&0x80 == 0x00 { + for i := 0; i < len(b); i++ { + b[i] = b[i] ^ 0xff + } + } else { + b[0] = b[0] ^ 0x80 + } +} + +var unpackFunctab unpackFuncTab = unpackFuncTab{ + nilCode: func(d unpackerState) (Boxed, int, error) { + if d.nesting > 0 { + if d.ptr+1 < len(d.buf) && + d.buf[d.ptr+1] == 0xff { + return newBoxedNil(), 2, nil + } else { + return Boxed{bt: boxedCtrl, data: 2}, 1, nil + } + } else { + return newBoxedNil(), 1, nil + } + }, + + bytesCode: createByteDecoder(boxedBytes), + stringCode: createByteDecoder(boxedString), + + trueCode: func(ds unpackerState) (Boxed, int, error) { + return newBoxedBool(true), 1, nil + }, + falseCode: func(ds unpackerState) (Boxed, int, error) { + return newBoxedBool(false), 1, nil + }, + + floatCode: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 4 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for float32") + } + bytes := make([]byte, 4) + copy(bytes, d.buf[d.ptr+1:d.ptr+5]) + adjustFloatBytesDecode(bytes) + return newBoxedFloat32(math.Float32frombits(binary.BigEndian.Uint32(bytes))), 5, nil + }, + + doubleCode: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 8 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for float64") + } + bytes := make([]byte, 8) + copy(bytes, d.buf[d.ptr+1:d.ptr+9]) + adjustFloatBytesDecode(bytes) + return newBoxedFloat64(math.Float64frombits(binary.BigEndian.Uint64(bytes))), 9, nil + }, + + uuidCode: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 16 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for UUID") + } + u := newBoxedUUID(d.buf[d.ptr+1 : d.ptr+17]) + return u, 17, nil + }, + + versionstampCode: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 12 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for Versionstamp") + } + vs := newBoxedVersionstamp(d.buf[d.ptr+1 : d.ptr+13]) + return vs, 13, nil + }, + + 0x0c: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 8 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for negative int64 (len 8)") + } + buf := make([]byte, 8) + copy(buf, d.buf[d.ptr+1:d.ptr+9]) + inv := ^binary.BigEndian.Uint64(buf) + if inv < ^uint64(0x8000_0000_0000_0000) { + return Boxed{}, 0, fmt.Errorf("bigint not supported") + } else { + return newBoxedInt64(-int64(inv)), 9, nil + } + }, + 0x0d: createIntDecoder(7, true), + 0x0e: createIntDecoder(6, true), + 0x0f: createIntDecoder(5, true), + 0x10: createIntDecoder(4, true), + 0x11: createIntDecoder(3, true), + 0x12: createIntDecoder(2, true), + 0x13: createIntDecoder(1, true), + // zero positive integer + 0x14: func(d unpackerState) (Boxed, int, error) { + return newBoxedInt64(0), 1, nil + }, + 0x15: createIntDecoder(1, false), + 0x16: createIntDecoder(2, false), + 0x17: createIntDecoder(3, false), + 0x18: createIntDecoder(4, false), + 0x19: createIntDecoder(5, false), + 0x1a: createIntDecoder(6, false), + 0x1b: createIntDecoder(7, false), + 0x1c: func(d unpackerState) (Boxed, int, error) { + if len(d.buf) < (d.ptr + 8 + 1) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for uint64 (len 8)") + } + buf := make([]byte, 8) + copy(buf, d.buf[d.ptr+1:d.ptr+9]) + return newBoxedUInt64(binary.BigEndian.Uint64(buf)), 9, nil + }, + + fixedLengthCode: func(d unpackerState) (Boxed, int, error) { + if d.ptr+1 >= len(d.buf) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for FixedLen") + } + dataLength := int(d.buf[d.ptr+1]) + if len(d.buf) < d.ptr+2+dataLength { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for FixedLen") + } + fl := newBoxedFixedLen(d.buf[d.ptr+2 : d.ptr+2+dataLength]) + return fl, 2 + dataLength, nil + }, + fixedLengthCode + 1: func(d unpackerState) (Boxed, int, error) { + if d.ptr+2 >= len(d.buf) { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for FixedLen") + } + dataLength := int(binary.BigEndian.Uint16(d.buf[d.ptr+1 : d.ptr+3])) + if len(d.buf) < d.ptr+3+dataLength { + return Boxed{}, 0, fmt.Errorf("insufficient bytes for FixedLen") + } + fl := newBoxedFixedLen(d.buf[d.ptr+3 : d.ptr+3+dataLength]) + return fl, 3 + dataLength, nil + }, + + nestedCode: func(d unpackerState) (Boxed, int, error) { + return Boxed{bt: boxedCtrl, data: 1}, 1, nil + }, +} + +type streamingUnpacker struct { + buf []byte + ptr int + nesting int +} + +type endInfo struct { + length int + decoded int +} + +//go:nosplit +func findEnd(buf []byte, start int) (ei endInfo, err error) { + ptr := unsafe.Pointer(&buf[start]) + rem := len(buf) - start + for rem > 0 { + if *(*byte)(ptr) == 0 { + ptr = unsafe.Add(ptr, 1) + rem-- + if rem > 0 && *(*byte)(ptr) == 255 { + ei.length++ + } else { + return + } + } + ei.length++ + ei.decoded++ + ptr = unsafe.Add(ptr, 1) + rem-- + } + err = fmt.Errorf("tuple byte string is not terminated") + return +} + +func (su *streamingUnpacker) Next() (Boxed, error) { + if len(su.buf) <= su.ptr { + return Boxed{}, nil + } + code := su.buf[su.ptr] + f := unpackFunctab[code] + if f == nil { + return Boxed{}, fmt.Errorf("unknown tuple type code %2x", code) + } + bv, n, err := f(unpackerState{buf: su.buf, ptr: su.ptr, nesting: su.nesting}) + if err != nil { + return Boxed{}, err + } + su.ptr += n + if bv.bt == boxedCtrl { + switch bv.data { + case 1: + su.nesting++ + case 2: + su.nesting-- + } + } + return bv, nil +} + +func (su *streamingUnpacker) HasMore() bool { + return len(su.buf) > su.ptr +} + +func unpackV2Internal(up *streamingUnpacker) (BoxedTuple, error) { + // This allows implementing "perfect" allocation of the tuple + // even though we don't know its length by: + // 1. Allocating the output slice on the stack with a hard to reach capacity + // 2. Appending to the output slice only without allowing it to escape + // 3. Copying the slice to a new heap-allocated slice + // + // If we did not copy the tuple in the end, the allocation would + // escape immediately on step 1, and we would allocate the entire + // 64x 24B slice, even if we don't come anywhere close. + // + // Starting with an empty slice would result in append gradually + // expanding the slice, which would result in multiple slices. + // + // This may spill to heap if we exceed 64 elements. This is very + // unlikely. + n := make(BoxedTuple, 0, 64) + + // unpacking loop +loop: + for up.HasMore() { + next, err := up.Next() + if err != nil { + return nil, err + } + if next.bt == boxedCtrl { + switch next.data { + case 1: + v, err := unpackV2Internal(up) + if err != nil { + return nil, err + } + n = append(n, newBoxedTuple(v)) + case 2: + break loop + default: + panic("unknown action") + } + } else { + n = append(n, next) + } + } + c := make(BoxedTuple, len(n)) + copy(c, n) + return c, nil +} + +// UnpackToBoxed unpacks a byte slice into a FoundationDB +// tuple using BoxedTuple. +// +// The provided byte slice must not be modified while the +// tuple is being used, as zero-copy decoding is attempted. +func UnpackToBoxed(b []byte) (BoxedTuple, error) { + if len(b) == 0 { + return BoxedTuple{}, nil + } + unpacker := &streamingUnpacker{buf: b} + return unpackV2Internal(unpacker) +} + +// Unpack unpacks a byte slice into a FoundationDB tuple. +// +// The provided byte slice must not be modified while the +// tuple is being used, as zero-copy decoding is attempted. +func UnpackV2(b []byte) (Tuple, error) { + if len(b) == 0 { + return Tuple{}, nil + } + unpacker := &streamingUnpacker{buf: b} + u, err := unpackV2Internal(unpacker) + if err != nil { + return nil, err + } + return u.ToTuple(), nil +}