diff --git a/codec.go b/codec.go index 192bcf6..28440fc 100644 --- a/codec.go +++ b/codec.go @@ -49,6 +49,14 @@ type CodecOption struct { // When true, the string literal "null" in textual Avro data will be coerced to Go's nil. // Primarily used to handle edge cases where some Avro implementations allow string representations of null. EnableStringNull bool + + // EnableDecimalBinarySpecCompliantEncoding controls whether decimal values use + // Avro 1.10.2 spec-compliant encoding. When true: + // - Binary encoding uses two's-complement representation of the unscaled integer + // - JSON textual encoding uses human-readable decimal strings like "40.20" + // When false (default), legacy encoding is used for backwards compatibility. + // Default: false (legacy encoding for backwards compatibility) + EnableDecimalBinarySpecCompliantEncoding bool } // Codec supports decoding binary and text Avro data to Go native data types, @@ -82,7 +90,8 @@ type codecBuilder struct { // DefaultCodecOption returns a CodecOption with recommended default settings. func DefaultCodecOption() *CodecOption { return &CodecOption{ - EnableStringNull: true, + EnableStringNull: true, + EnableDecimalBinarySpecCompliantEncoding: false, } } @@ -739,9 +748,9 @@ func buildCodecForTypeDescribedByString(st map[string]*Codec, enclosingNamespace case "record": return makeRecordCodec(st, enclosingNamespace, schemaMap, cb) case "bytes.decimal": - return makeDecimalBytesCodec(st, enclosingNamespace, schemaMap) + return makeDecimalBytesCodec(st, enclosingNamespace, schemaMap, cb) case "fixed.decimal": - return makeDecimalFixedCodec(st, enclosingNamespace, schemaMap) + return makeDecimalFixedCodec(st, enclosingNamespace, schemaMap, cb) case "string.validated-string": return makeValidatedStringCodec(st, enclosingNamespace, schemaMap) default: diff --git a/logical_type.go b/logical_type.go index 80e9dc2..4311b1c 100644 --- a/logical_type.go +++ b/logical_type.go @@ -258,7 +258,7 @@ func precisionAndScaleFromSchemaMap(schemaMap map[string]interface{}) (int, int, var one = big.NewInt(1) -func makeDecimalBytesCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) { +func makeDecimalBytesCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}, cb *codecBuilder) (*Codec, error) { precision, scale, err := precisionAndScaleFromSchemaMap(schemaMap) if err != nil { return nil, err @@ -275,14 +275,27 @@ func makeDecimalBytesCodec(st map[string]*Codec, enclosingNamespace string, sche decimalSearchType := fmt.Sprintf("bytes.decimal.%d.%d", precision, scale) st[decimalSearchType] = c - c.binaryFromNative = decimalBytesFromNative(bytesBinaryFromNative, toSignedBytes, precision, scale) - c.textualFromNative = decimalBytesFromNative(bytesTextualFromNative, toSignedBytes, precision, scale) - c.nativeFromBinary = nativeFromDecimalBytes(bytesNativeFromBinary, precision, scale) - c.nativeFromTextual = nativeFromDecimalBytes(bytesNativeFromTextual, precision, scale) + // Check if spec-compliant encoding is enabled + specCompliant := cb != nil && cb.option != nil && cb.option.EnableDecimalBinarySpecCompliantEncoding + + if specCompliant { + // Spec-compliant encoding: two's complement binary, human-readable textual + c.binaryFromNative = decimalBytesFromNative(bytesBinaryFromNative, toSignedBytes, precision, scale) + c.textualFromNative = decimalTextualFromNative(scale) + c.nativeFromBinary = nativeFromDecimalBytes(bytesNativeFromBinary, scale) + c.nativeFromTextual = nativeFromDecimalTextual() + } else { + // Legacy encoding (default): for backwards compatibility + c.binaryFromNative = decimalBytesFromNative(bytesBinaryFromNative, toSignedBytes, precision, scale) + c.textualFromNative = decimalBytesFromNative(bytesTextualFromNative, toSignedBytes, precision, scale) + c.nativeFromBinary = nativeFromDecimalBytes(bytesNativeFromBinary, scale) + c.nativeFromTextual = nativeFromDecimalBytes(bytesNativeFromTextual, scale) + } return c, nil } -func nativeFromDecimalBytes(fn toNativeFn, precision, scale int) toNativeFn { +// nativeFromDecimalBytes decodes bytes to *big.Rat using two's-complement representation. +func nativeFromDecimalBytes(fn toNativeFn, scale int) toNativeFn { return func(bytes []byte) (interface{}, []byte, error) { d, b, err := fn(bytes) if err != nil { @@ -292,15 +305,16 @@ func nativeFromDecimalBytes(fn toNativeFn, precision, scale int) toNativeFn { if !ok { return nil, bytes, fmt.Errorf("cannot transform to native decimal, expected []byte, received %T", d) } + + // Two's-complement decoding num := big.NewInt(0) fromSignedBytes(num, bs) denom := new(big.Int).Exp(big.NewInt(10), big.NewInt(int64(scale)), nil) - r := new(big.Rat).SetFrac(num, denom) - return r, b, nil + return new(big.Rat).SetFrac(num, denom), b, nil } } -func decimalBytesFromNative(fromNativeFn fromNativeFn, toBytesFn toBytesFn, precision, scale int) fromNativeFn { +func decimalBytesFromNative(fromNativeFn fromNativeFn, toBytesFn toBytesFn, _, scale int) fromNativeFn { return func(b []byte, d interface{}) ([]byte, error) { r, ok := d.(*big.Rat) if !ok { @@ -320,7 +334,36 @@ func decimalBytesFromNative(fromNativeFn fromNativeFn, toBytesFn toBytesFn, prec } } -func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) { +// decimalTextualFromNative encodes a *big.Rat to a JSON string representation +// like "40.20" according to the Avro 1.10.2 spec. +func decimalTextualFromNative(scale int) fromNativeFn { + return func(b []byte, d interface{}) ([]byte, error) { + r, ok := d.(*big.Rat) + if !ok { + return nil, fmt.Errorf("cannot transform to textual decimal, expected *big.Rat, received %T", d) + } + // Format as decimal string with proper scale + return stringTextualFromNative(b, r.FloatString(scale)) + } +} + +// nativeFromDecimalTextual decodes a JSON string like "40.20" to a *big.Rat +// according to the Avro 1.10.2 spec. +func nativeFromDecimalTextual() toNativeFn { + return func(buf []byte) (interface{}, []byte, error) { + s, remaining, err := stringNativeFromTextual(buf) + if err != nil { + return nil, nil, fmt.Errorf("cannot decode textual decimal: %s", err) + } + r := new(big.Rat) + if _, ok := r.SetString(s.(string)); !ok { + return nil, nil, fmt.Errorf("cannot parse decimal string: %q", s) + } + return r, remaining, nil + } +} + +func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}, cb *codecBuilder) (*Codec, error) { precision, scale, err := precisionAndScaleFromSchemaMap(schemaMap) if err != nil { return nil, err @@ -336,10 +379,23 @@ func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, sche if err != nil { return nil, err } - c.binaryFromNative = decimalBytesFromNative(c.binaryFromNative, toSignedFixedBytes(size), precision, scale) - c.textualFromNative = decimalBytesFromNative(c.textualFromNative, toSignedFixedBytes(size), precision, scale) - c.nativeFromBinary = nativeFromDecimalBytes(c.nativeFromBinary, precision, scale) - c.nativeFromTextual = nativeFromDecimalBytes(c.nativeFromTextual, precision, scale) + + // Check if spec-compliant encoding is enabled + specCompliant := cb != nil && cb.option != nil && cb.option.EnableDecimalBinarySpecCompliantEncoding + + if specCompliant { + // Spec-compliant encoding: two's complement binary, human-readable textual + c.binaryFromNative = decimalBytesFromNative(c.binaryFromNative, toSignedFixedBytes(size), precision, scale) + c.textualFromNative = decimalTextualFromNative(scale) + c.nativeFromBinary = nativeFromDecimalBytes(c.nativeFromBinary, scale) + c.nativeFromTextual = nativeFromDecimalTextual() + } else { + // Legacy encoding (default): for backwards compatibility + c.binaryFromNative = decimalBytesFromNative(c.binaryFromNative, toSignedFixedBytes(size), precision, scale) + c.textualFromNative = decimalBytesFromNative(c.textualFromNative, toSignedFixedBytes(size), precision, scale) + c.nativeFromBinary = nativeFromDecimalBytes(c.nativeFromBinary, scale) + c.nativeFromTextual = nativeFromDecimalBytes(c.nativeFromTextual, scale) + } return c, nil } diff --git a/logical_type_test.go b/logical_type_test.go index a52fc1d..7170ccb 100644 --- a/logical_type_test.go +++ b/logical_type_test.go @@ -183,6 +183,275 @@ func TestDecimalBytesLogicalTypeInRecordDecodeWithDefault(t *testing.T) { testBinaryCodecPass(t, schema, map[string]interface{}{"mydecimal": big.NewRat(617, 50)}, []byte("\x04\x04\xd2")) } +func TestDecimalBytesSpecCompliantTextualRoundTrip(t *testing.T) { + // Test spec-compliant textual encoding with human-readable decimal strings + schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}` + + // Create codec with spec-compliant encoding enabled + opt := &CodecOption{EnableDecimalBinarySpecCompliantEncoding: true} + codec, err := NewCodecWithOptions(schema, opt) + if err != nil { + t.Fatal(err) + } + + testCases := []struct { + textual string + expected *big.Rat + }{ + {`"40.20"`, big.NewRat(4020, 100)}, + {`"12.34"`, big.NewRat(1234, 100)}, + {`"-12.34"`, big.NewRat(-1234, 100)}, + {`"0.00"`, big.NewRat(0, 1)}, + {`"99.99"`, big.NewRat(9999, 100)}, + } + + for _, tc := range testCases { + // Decode textual to native + native, _, err := codec.NativeFromTextual([]byte(tc.textual)) + if err != nil { + t.Fatalf("NativeFromTextual(%s): %v", tc.textual, err) + } + + rat, ok := native.(*big.Rat) + if !ok { + t.Fatalf("NativeFromTextual(%s): expected *big.Rat, got %T", tc.textual, native) + } + + if rat.Cmp(tc.expected) != 0 { + t.Errorf("NativeFromTextual(%s): got %v, want %v", tc.textual, rat, tc.expected) + } + + // Encode native to textual + textual, err := codec.TextualFromNative(nil, rat) + if err != nil { + t.Fatalf("TextualFromNative(%v): %v", rat, err) + } + + if string(textual) != tc.textual { + t.Errorf("TextualFromNative(%v): got %s, want %s", rat, textual, tc.textual) + } + } +} + +func TestDecimalFixedSpecCompliantTextualRoundTrip(t *testing.T) { + // Test spec-compliant textual encoding with human-readable decimal strings + schema := `{"type": "fixed", "size": 12, "logicalType": "decimal", "precision": 4, "scale": 2}` + + // Create codec with spec-compliant encoding enabled + opt := &CodecOption{EnableDecimalBinarySpecCompliantEncoding: true} + codec, err := NewCodecWithOptions(schema, opt) + if err != nil { + t.Fatal(err) + } + + testCases := []struct { + textual string + expected *big.Rat + }{ + {`"40.20"`, big.NewRat(4020, 100)}, + {`"12.34"`, big.NewRat(1234, 100)}, + {`"-12.34"`, big.NewRat(-1234, 100)}, + {`"0.00"`, big.NewRat(0, 1)}, + } + + for _, tc := range testCases { + // Decode textual to native + native, _, err := codec.NativeFromTextual([]byte(tc.textual)) + if err != nil { + t.Fatalf("NativeFromTextual(%s): %v", tc.textual, err) + } + + rat, ok := native.(*big.Rat) + if !ok { + t.Fatalf("NativeFromTextual(%s): expected *big.Rat, got %T", tc.textual, native) + } + + if rat.Cmp(tc.expected) != 0 { + t.Errorf("NativeFromTextual(%s): got %v, want %v", tc.textual, rat, tc.expected) + } + + // Encode native to textual + textual, err := codec.TextualFromNative(nil, rat) + if err != nil { + t.Fatalf("TextualFromNative(%v): %v", rat, err) + } + + if string(textual) != tc.textual { + t.Errorf("TextualFromNative(%v): got %s, want %s", rat, textual, tc.textual) + } + } +} + +func TestDecimalBytesLegacyTextualRoundTrip(t *testing.T) { + // Test legacy (default) textual encoding with escaped bytes format + schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}` + + // Create codec with default options (legacy encoding) + codec, err := NewCodec(schema) + if err != nil { + t.Fatal(err) + } + + testCases := []struct { + native *big.Rat + expected string // escaped bytes format + }{ + {big.NewRat(4020, 100), `"\u000F\u00B4"`}, // 4020 = 0x0FB4 + {big.NewRat(1234, 100), `"\u0004\u00D2"`}, // 1234 = 0x04D2 + {big.NewRat(-1234, 100), `"\u00FB."`}, // -1234 in two's complement + {big.NewRat(0, 1), `"\u0000"`}, // 0 + {big.NewRat(9999, 100), `"'\u000F"`}, // 9999 = 0x270F + } + + for _, tc := range testCases { + // Encode native to textual + textual, err := codec.TextualFromNative(nil, tc.native) + if err != nil { + t.Fatalf("TextualFromNative(%v): %v", tc.native, err) + } + + if string(textual) != tc.expected { + t.Errorf("TextualFromNative(%v): got %s, want %s", tc.native, textual, tc.expected) + } + + // Decode textual back to native + native, _, err := codec.NativeFromTextual(textual) + if err != nil { + t.Fatalf("NativeFromTextual(%s): %v", textual, err) + } + + rat, ok := native.(*big.Rat) + if !ok { + t.Fatalf("NativeFromTextual(%s): expected *big.Rat, got %T", textual, native) + } + + if rat.Cmp(tc.native) != 0 { + t.Errorf("NativeFromTextual(%s): got %v, want %v", textual, rat, tc.native) + } + } +} + +func TestDecimalFixedLegacyTextualRoundTrip(t *testing.T) { + // Test legacy (default) textual encoding with escaped bytes format for fixed type + schema := `{"type": "fixed", "size": 12, "logicalType": "decimal", "precision": 4, "scale": 2}` + + // Create codec with default options (legacy encoding) + codec, err := NewCodec(schema) + if err != nil { + t.Fatal(err) + } + + testCases := []struct { + native *big.Rat + }{ + {big.NewRat(4020, 100)}, + {big.NewRat(1234, 100)}, + {big.NewRat(-1234, 100)}, + // Note: 0 is not tested here due to fixed size constraints + } + + for _, tc := range testCases { + // Encode native to textual + textual, err := codec.TextualFromNative(nil, tc.native) + if err != nil { + t.Fatalf("TextualFromNative(%v): %v", tc.native, err) + } + + // Decode textual back to native - should round-trip correctly + native, _, err := codec.NativeFromTextual(textual) + if err != nil { + t.Fatalf("NativeFromTextual(%s): %v", textual, err) + } + + rat, ok := native.(*big.Rat) + if !ok { + t.Fatalf("NativeFromTextual(%s): expected *big.Rat, got %T", textual, native) + } + + if rat.Cmp(tc.native) != 0 { + t.Errorf("Round-trip failed for %v: got %v", tc.native, rat) + } + } +} + +func TestDecimalBytesCorrectBinaryEncoding(t *testing.T) { + // Test that binary encoding uses two's complement (same for both legacy and spec-compliant) + schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}` + codec, err := NewCodec(schema) + if err != nil { + t.Fatal(err) + } + + // 40.20 = 4020 with scale 2 + // 4020 in two's complement = 0x0FB4 (big-endian) + // Avro bytes: length prefix (4 = 0x04) + 0x0F, 0xB4 + correctlyEncodedBytes := []byte{0x04, 0x0f, 0xb4} + + native, _, err := codec.NativeFromBinary(correctlyEncodedBytes) + if err != nil { + t.Fatalf("NativeFromBinary: %v", err) + } + + rat, ok := native.(*big.Rat) + if !ok { + t.Fatalf("NativeFromBinary: expected *big.Rat, got %T", native) + } + + expected := big.NewRat(4020, 100) + if rat.Cmp(expected) != 0 { + t.Errorf("NativeFromBinary: got %v, want %v", rat, expected) + } +} + +func TestDecimalSpecCompliantTextualToBinaryRoundTrip(t *testing.T) { + // Test the full flow with spec-compliant encoding: textual -> native -> binary -> native -> textual + schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}` + + // Create codec with spec-compliant encoding enabled + opt := &CodecOption{EnableDecimalBinarySpecCompliantEncoding: true} + codec, err := NewCodecWithOptions(schema, opt) + if err != nil { + t.Fatal(err) + } + + originalTextual := []byte(`"40.20"`) + + // Step 1: Textual -> Native + native1, _, err := codec.NativeFromTextual(originalTextual) + if err != nil { + t.Fatalf("NativeFromTextual: %v", err) + } + + // Step 2: Native -> Binary + binary, err := codec.BinaryFromNative(nil, native1) + if err != nil { + t.Fatalf("BinaryFromNative: %v", err) + } + + // Verify binary is two's complement + // 4020 = 0x0FB4 in hex + expectedBinary := []byte{0x04, 0x0f, 0xb4} + if string(binary) != string(expectedBinary) { + t.Errorf("BinaryFromNative: got %x, want %x", binary, expectedBinary) + } + + // Step 3: Binary -> Native + native2, _, err := codec.NativeFromBinary(binary) + if err != nil { + t.Fatalf("NativeFromBinary: %v", err) + } + + // Step 4: Native -> Textual + textual, err := codec.TextualFromNative(nil, native2) + if err != nil { + t.Fatalf("TextualFromNative: %v", err) + } + + if string(textual) != string(originalTextual) { + t.Errorf("Round-trip failed: got %s, want %s", textual, originalTextual) + } +} + func TestValidatedStringLogicalTypeInRecordEncode(t *testing.T) { schema := `{ "type": "record",