Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 79 additions & 10 deletions logical_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,15 @@ func makeDecimalBytesCodec(st map[string]*Codec, enclosingNamespace string, sche
st[decimalSearchType] = c

c.binaryFromNative = decimalBytesFromNative(bytesBinaryFromNative, toSignedBytes, precision, scale)
c.textualFromNative = decimalBytesFromNative(bytesTextualFromNative, toSignedBytes, precision, scale)
c.nativeFromBinary = nativeFromDecimalBytes(bytesNativeFromBinary, precision, scale)
c.nativeFromTextual = nativeFromDecimalBytes(bytesNativeFromTextual, precision, scale)
c.textualFromNative = decimalTextualFromNative(scale)
c.nativeFromBinary = nativeFromDecimalBytes(bytesNativeFromBinary, scale)
c.nativeFromTextual = nativeFromDecimalTextual()
return c, nil
}

func nativeFromDecimalBytes(fn toNativeFn, precision, scale int) toNativeFn {
// nativeFromDecimalBytes decodes bytes to *big.Rat with backwards compatibility
// for incorrectly encoded ASCII decimal strings.
func nativeFromDecimalBytes(fn toNativeFn, scale int) toNativeFn {
return func(bytes []byte) (interface{}, []byte, error) {
d, b, err := fn(bytes)
if err != nil {
Expand All @@ -292,15 +294,24 @@ func nativeFromDecimalBytes(fn toNativeFn, precision, scale int) toNativeFn {
if !ok {
return nil, bytes, fmt.Errorf("cannot transform to native decimal, expected []byte, received %T", d)
}

// Check if bytes look like ASCII decimal string (backwards compat)
if looksLikeASCIIDecimal(bs) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think this is backwards compatible, are we sure there is never valid encoding of a number that would look like valid ascii? I don’t believe so. We need an option for this unless you have some proof this is never wrong

Copy link
Contributor Author

@passuied passuied Jan 3, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right... I have added an option and reduced the scope of the heuristic for determining that the bytes are a valid ascii..

... Or alternatively, we could have a compatibility mode (by default) which can be overridden to false so the encoding is modified.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rockwotj ok decided to go with fully backwards compatible option and NOT try to recover badly encoded data (since it's imperfect logic). So enabling the spec compliant option will switch to the new mode...

r := new(big.Rat)
if _, ok := r.SetString(string(bs)); ok {
return r, b, nil
}
}

// Normal two's-complement decoding
num := big.NewInt(0)
fromSignedBytes(num, bs)
denom := new(big.Int).Exp(big.NewInt(10), big.NewInt(int64(scale)), nil)
r := new(big.Rat).SetFrac(num, denom)
return r, b, nil
return new(big.Rat).SetFrac(num, denom), b, nil
}
}

func decimalBytesFromNative(fromNativeFn fromNativeFn, toBytesFn toBytesFn, precision, scale int) fromNativeFn {
func decimalBytesFromNative(fromNativeFn fromNativeFn, toBytesFn toBytesFn, _, scale int) fromNativeFn {
return func(b []byte, d interface{}) ([]byte, error) {
r, ok := d.(*big.Rat)
if !ok {
Expand All @@ -320,6 +331,64 @@ func decimalBytesFromNative(fromNativeFn fromNativeFn, toBytesFn toBytesFn, prec
}
}

// decimalTextualFromNative encodes a *big.Rat to a JSON string representation
// like "40.20" according to the Avro 1.10.2 spec.
func decimalTextualFromNative(scale int) fromNativeFn {
return func(b []byte, d interface{}) ([]byte, error) {
r, ok := d.(*big.Rat)
if !ok {
return nil, fmt.Errorf("cannot transform to textual decimal, expected *big.Rat, received %T", d)
}
// Format as decimal string with proper scale
return stringTextualFromNative(b, r.FloatString(scale))
}
}

// nativeFromDecimalTextual decodes a JSON string like "40.20" to a *big.Rat
// according to the Avro 1.10.2 spec.
func nativeFromDecimalTextual() toNativeFn {
return func(buf []byte) (interface{}, []byte, error) {
s, remaining, err := stringNativeFromTextual(buf)
if err != nil {
return nil, nil, fmt.Errorf("cannot decode textual decimal: %s", err)
}
r := new(big.Rat)
if _, ok := r.SetString(s.(string)); !ok {
return nil, nil, fmt.Errorf("cannot parse decimal string: %q", s)
}
return r, remaining, nil
}
}

// looksLikeASCIIDecimal checks if the bytes look like an ASCII decimal string
// (for backwards compatibility with incorrectly encoded data).
// Returns true if all bytes are printable ASCII and form a valid decimal pattern.
func looksLikeASCIIDecimal(bs []byte) bool {
if len(bs) == 0 {
return false
}
hasDigit := false
hasDot := false
for i, b := range bs {
switch {
case b >= '0' && b <= '9':
hasDigit = true
case b == '.':
if hasDot {
return false // multiple dots
}
hasDot = true
case b == '-' || b == '+':
if i != 0 {
return false // sign not at start
}
default:
return false // non-decimal character
}
}
return hasDigit
}

func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, schemaMap map[string]interface{}) (*Codec, error) {
precision, scale, err := precisionAndScaleFromSchemaMap(schemaMap)
if err != nil {
Expand All @@ -337,9 +406,9 @@ func makeDecimalFixedCodec(st map[string]*Codec, enclosingNamespace string, sche
return nil, err
}
c.binaryFromNative = decimalBytesFromNative(c.binaryFromNative, toSignedFixedBytes(size), precision, scale)
c.textualFromNative = decimalBytesFromNative(c.textualFromNative, toSignedFixedBytes(size), precision, scale)
c.nativeFromBinary = nativeFromDecimalBytes(c.nativeFromBinary, precision, scale)
c.nativeFromTextual = nativeFromDecimalBytes(c.nativeFromTextual, precision, scale)
c.textualFromNative = decimalTextualFromNative(scale)
c.nativeFromBinary = nativeFromDecimalBytes(c.nativeFromBinary, scale)
c.nativeFromTextual = nativeFromDecimalTextual()
return c, nil
}

Expand Down
253 changes: 253 additions & 0 deletions logical_type_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,259 @@ func TestDecimalBytesLogicalTypeInRecordDecodeWithDefault(t *testing.T) {
testBinaryCodecPass(t, schema, map[string]interface{}{"mydecimal": big.NewRat(617, 50)}, []byte("\x04\x04\xd2"))
}

func TestDecimalBytesTextualRoundTrip(t *testing.T) {
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
codec, err := NewCodec(schema)
if err != nil {
t.Fatal(err)
}

testCases := []struct {
textual string
expected *big.Rat
}{
{`"40.20"`, big.NewRat(4020, 100)},
{`"12.34"`, big.NewRat(1234, 100)},
{`"-12.34"`, big.NewRat(-1234, 100)},
{`"0.00"`, big.NewRat(0, 1)},
{`"99.99"`, big.NewRat(9999, 100)},
}

for _, tc := range testCases {
// Decode textual to native
native, _, err := codec.NativeFromTextual([]byte(tc.textual))
if err != nil {
t.Fatalf("NativeFromTextual(%s): %v", tc.textual, err)
}

rat, ok := native.(*big.Rat)
if !ok {
t.Fatalf("NativeFromTextual(%s): expected *big.Rat, got %T", tc.textual, native)
}

if rat.Cmp(tc.expected) != 0 {
t.Errorf("NativeFromTextual(%s): got %v, want %v", tc.textual, rat, tc.expected)
}

// Encode native to textual
textual, err := codec.TextualFromNative(nil, rat)
if err != nil {
t.Fatalf("TextualFromNative(%v): %v", rat, err)
}

if string(textual) != tc.textual {
t.Errorf("TextualFromNative(%v): got %s, want %s", rat, textual, tc.textual)
}
}
}

func TestDecimalFixedTextualRoundTrip(t *testing.T) {
schema := `{"type": "fixed", "size": 12, "logicalType": "decimal", "precision": 4, "scale": 2}`
codec, err := NewCodec(schema)
if err != nil {
t.Fatal(err)
}

testCases := []struct {
textual string
expected *big.Rat
}{
{`"40.20"`, big.NewRat(4020, 100)},
{`"12.34"`, big.NewRat(1234, 100)},
{`"-12.34"`, big.NewRat(-1234, 100)},
{`"0.00"`, big.NewRat(0, 1)},
}

for _, tc := range testCases {
// Decode textual to native
native, _, err := codec.NativeFromTextual([]byte(tc.textual))
if err != nil {
t.Fatalf("NativeFromTextual(%s): %v", tc.textual, err)
}

rat, ok := native.(*big.Rat)
if !ok {
t.Fatalf("NativeFromTextual(%s): expected *big.Rat, got %T", tc.textual, native)
}

if rat.Cmp(tc.expected) != 0 {
t.Errorf("NativeFromTextual(%s): got %v, want %v", tc.textual, rat, tc.expected)
}

// Encode native to textual
textual, err := codec.TextualFromNative(nil, rat)
if err != nil {
t.Fatalf("TextualFromNative(%v): %v", rat, err)
}

if string(textual) != tc.textual {
t.Errorf("TextualFromNative(%v): got %s, want %s", rat, textual, tc.textual)
}
}
}

func TestDecimalBytesBackwardsCompatibility(t *testing.T) {
// Test that binary data incorrectly encoded as ASCII decimal strings
// can still be decoded correctly (backwards compatibility)
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
codec, err := NewCodec(schema)
if err != nil {
t.Fatal(err)
}

// Simulate incorrectly encoded data: "40.20" as ASCII bytes
// Length prefix (10 = 0x14 in varint) + ASCII bytes for "40.20"
incorrectlyEncodedBytes := append([]byte{0x0a}, []byte("40.20")...)

native, _, err := codec.NativeFromBinary(incorrectlyEncodedBytes)
if err != nil {
t.Fatalf("NativeFromBinary (backwards compat): %v", err)
}

rat, ok := native.(*big.Rat)
if !ok {
t.Fatalf("NativeFromBinary: expected *big.Rat, got %T", native)
}

expected := big.NewRat(4020, 100)
if rat.Cmp(expected) != 0 {
t.Errorf("NativeFromBinary (backwards compat): got %v, want %v", rat, expected)
}
}

func TestDecimalBytesCorrectBinaryEncoding(t *testing.T) {
// Test that correctly encoded binary data (two's complement) still works
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
codec, err := NewCodec(schema)
if err != nil {
t.Fatal(err)
}

// 40.20 = 4020 with scale 2
// 4020 in two's complement = 0x0FB4 (big-endian)
// Avro bytes: length prefix (4 = 0x04) + 0x0F, 0xB4
correctlyEncodedBytes := []byte{0x04, 0x0f, 0xb4}

native, _, err := codec.NativeFromBinary(correctlyEncodedBytes)
if err != nil {
t.Fatalf("NativeFromBinary: %v", err)
}

rat, ok := native.(*big.Rat)
if !ok {
t.Fatalf("NativeFromBinary: expected *big.Rat, got %T", native)
}

expected := big.NewRat(4020, 100)
if rat.Cmp(expected) != 0 {
t.Errorf("NativeFromBinary: got %v, want %v", rat, expected)
}
}

func TestDecimalTextualToBinaryRoundTrip(t *testing.T) {
// Test the full flow: textual -> native -> binary -> native -> textual
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
codec, err := NewCodec(schema)
if err != nil {
t.Fatal(err)
}

originalTextual := []byte(`"40.20"`)

// Step 1: Textual -> Native
native1, _, err := codec.NativeFromTextual(originalTextual)
if err != nil {
t.Fatalf("NativeFromTextual: %v", err)
}

// Step 2: Native -> Binary
binary, err := codec.BinaryFromNative(nil, native1)
if err != nil {
t.Fatalf("BinaryFromNative: %v", err)
}

// Verify binary is two's complement, not ASCII string
// 4020 = 0x0FB4 in hex
expectedBinary := []byte{0x04, 0x0f, 0xb4}
if string(binary) != string(expectedBinary) {
t.Errorf("BinaryFromNative: got %x, want %x", binary, expectedBinary)
}

// Step 3: Binary -> Native
native2, _, err := codec.NativeFromBinary(binary)
if err != nil {
t.Fatalf("NativeFromBinary: %v", err)
}

// Step 4: Native -> Textual
textual, err := codec.TextualFromNative(nil, native2)
if err != nil {
t.Fatalf("TextualFromNative: %v", err)
}

if string(textual) != string(originalTextual) {
t.Errorf("Round-trip failed: got %s, want %s", textual, originalTextual)
}
}

func TestLooksLikeASCIIDecimal(t *testing.T) {
testCases := []struct {
input []byte
expected bool
}{
{[]byte("40.20"), true},
{[]byte("-40.20"), true},
{[]byte("+40.20"), true},
{[]byte("0"), true},
{[]byte("123456"), true},
{[]byte(".5"), true},
{[]byte("5."), true},
{[]byte(""), false},
{[]byte("-"), false},
{[]byte("40.20.30"), false}, // multiple dots
{[]byte("40-20"), false}, // sign not at start
{[]byte("40a20"), false}, // non-decimal char
{[]byte("\x0f\xb4"), false}, // binary data (two's complement)
{[]byte{0x00}, false}, // null byte
{[]byte{0xff, 0xff}, false}, // high bytes (negative two's complement)
{[]byte("12.34e5"), false}, // scientific notation not supported
}

for _, tc := range testCases {
result := looksLikeASCIIDecimal(tc.input)
if result != tc.expected {
t.Errorf("looksLikeASCIIDecimal(%q): got %v, want %v", tc.input, result, tc.expected)
}
}
}

func TestDecimalNegativeBackwardsCompatibility(t *testing.T) {
// Test backwards compatibility with negative numbers encoded as ASCII
schema := `{"type": "bytes", "logicalType": "decimal", "precision": 4, "scale": 2}`
codec, err := NewCodec(schema)
if err != nil {
t.Fatal(err)
}

// Simulate incorrectly encoded data: "-40.20" as ASCII bytes
incorrectlyEncodedBytes := append([]byte{0x0c}, []byte("-40.20")...)

native, _, err := codec.NativeFromBinary(incorrectlyEncodedBytes)
if err != nil {
t.Fatalf("NativeFromBinary (backwards compat): %v", err)
}

rat, ok := native.(*big.Rat)
if !ok {
t.Fatalf("NativeFromBinary: expected *big.Rat, got %T", native)
}

expected := big.NewRat(-4020, 100)
if rat.Cmp(expected) != 0 {
t.Errorf("NativeFromBinary (backwards compat): got %v, want %v", rat, expected)
}
}

func TestValidatedStringLogicalTypeInRecordEncode(t *testing.T) {
schema := `{
"type": "record",
Expand Down
Loading