diff --git a/codec.go b/codec.go index 28440fc..cb38f7a 100644 --- a/codec.go +++ b/codec.go @@ -50,6 +50,12 @@ type CodecOption struct { // Primarily used to handle edge cases where some Avro implementations allow string representations of null. EnableStringNull bool + // IgnoreExtraFieldsFromTextual controls how unknown fields are handled during textual (JSON) decoding. + // When true, fields in the JSON input that are not defined in the Avro schema will be + // silently skipped. This enables forward-compatible schema evolution where consumers + // can process messages from producers using newer schemas with additional fields. + // When false (default), unknown fields cause a "cannot determine codec" error. + IgnoreExtraFieldsFromTextual bool // EnableDecimalBinarySpecCompliantEncoding controls whether decimal values use // Avro 1.10.2 spec-compliant encoding. When true: // - Binary encoding uses two's-complement representation of the unscaled integer @@ -91,6 +97,7 @@ type codecBuilder struct { func DefaultCodecOption() *CodecOption { return &CodecOption{ EnableStringNull: true, + IgnoreExtraFieldsFromTextual: false, EnableDecimalBinarySpecCompliantEncoding: false, } } diff --git a/json_utils.go b/json_utils.go new file mode 100644 index 0000000..6bba4dc --- /dev/null +++ b/json_utils.go @@ -0,0 +1,234 @@ +// Copyright [2019] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "fmt" + "io" +) + +// skipJSONValue advances the buffer past a single JSON value (object, array, string, number, bool, null). +// This is used when IgnoreExtraFieldsFromTextual is enabled to skip over values of unknown fields. +func skipJSONValue(buf []byte) ([]byte, error) { + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, io.ErrShortBuffer + } + + switch buf[0] { + case '{': + // Skip object: find matching closing brace + return skipJSONObject(buf) + case '[': + // Skip array: find matching closing bracket + return skipJSONArray(buf) + case '"': + // Skip string: find closing quote (handling escapes) + return skipJSONString(buf) + case 't': + // true + if len(buf) >= 4 && string(buf[:4]) == "true" { + return buf[4:], nil + } + return nil, fmt.Errorf("cannot skip JSON value: invalid literal starting with 't'") + case 'f': + // false + if len(buf) >= 5 && string(buf[:5]) == "false" { + return buf[5:], nil + } + return nil, fmt.Errorf("cannot skip JSON value: invalid literal starting with 'f'") + case 'n': + // null + if len(buf) >= 4 && string(buf[:4]) == "null" { + return buf[4:], nil + } + return nil, fmt.Errorf("cannot skip JSON value: invalid literal starting with 'n'") + default: + // Must be a number (or invalid) + if buf[0] == '-' || (buf[0] >= '0' && buf[0] <= '9') { + return skipJSONNumber(buf) + } + return nil, fmt.Errorf("cannot skip JSON value: unexpected character: %q", buf[0]) + } +} + +// skipJSONObject skips a JSON object starting with '{' and returns the buffer after the closing '}'. +func skipJSONObject(buf []byte) ([]byte, error) { + if len(buf) == 0 || buf[0] != '{' { + return nil, fmt.Errorf("cannot skip JSON object: expected '{'") + } + buf = buf[1:] // consume '{' + var err error + + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, io.ErrShortBuffer + } + + // Handle empty object + if buf[0] == '}' { + return buf[1:], nil + } + + for len(buf) > 0 { + // Skip key (string) + if buf, err = skipJSONString(buf); err != nil { + return nil, err + } + // Skip colon + if buf, err = advanceAndConsume(buf, ':'); err != nil { + return nil, err + } + // Skip value + if buf, err = skipJSONValue(buf); err != nil { + return nil, err + } + // Check for comma or closing brace + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, io.ErrShortBuffer + } + switch buf[0] { + case '}': + return buf[1:], nil + case ',': + buf = buf[1:] + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, io.ErrShortBuffer + } + default: + return nil, fmt.Errorf("cannot skip JSON object: expected ',' or '}'; received: %q", buf[0]) + } + } + return nil, io.ErrShortBuffer +} + +// skipJSONArray skips a JSON array starting with '[' and returns the buffer after the closing ']'. +func skipJSONArray(buf []byte) ([]byte, error) { + if len(buf) == 0 || buf[0] != '[' { + return nil, fmt.Errorf("cannot skip JSON array: expected '['") + } + buf = buf[1:] // consume '[' + var err error + + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, io.ErrShortBuffer + } + + // Handle empty array + if buf[0] == ']' { + return buf[1:], nil + } + + for len(buf) > 0 { + // Skip value + if buf, err = skipJSONValue(buf); err != nil { + return nil, err + } + // Check for comma or closing bracket + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, io.ErrShortBuffer + } + switch buf[0] { + case ']': + return buf[1:], nil + case ',': + buf = buf[1:] + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, io.ErrShortBuffer + } + default: + return nil, fmt.Errorf("cannot skip JSON array: expected ',' or ']'; received: %q", buf[0]) + } + } + return nil, io.ErrShortBuffer +} + +// skipJSONString skips a JSON string starting with '"' and returns the buffer after the closing '"'. +func skipJSONString(buf []byte) ([]byte, error) { + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, io.ErrShortBuffer + } + if buf[0] != '"' { + return nil, fmt.Errorf("cannot skip JSON string: expected '\"'") + } + buf = buf[1:] // consume opening quote + + for i := 0; i < len(buf); i++ { + switch buf[i] { + case '\\': + // Skip the next character (escaped) + i++ + case '"': + // Found closing quote + return buf[i+1:], nil + } + } + return nil, io.ErrShortBuffer +} + +// skipJSONNumber skips a JSON number and returns the buffer after the number. +func skipJSONNumber(buf []byte) ([]byte, error) { + if len(buf) == 0 { + return nil, io.ErrShortBuffer + } + + i := 0 + + // Optional minus sign + if buf[i] == '-' { + i++ + if i >= len(buf) { + return nil, io.ErrShortBuffer + } + } + + // Integer part + switch { + case buf[i] == '0': + i++ + case buf[i] >= '1' && buf[i] <= '9': + for i < len(buf) && buf[i] >= '0' && buf[i] <= '9' { + i++ + } + default: + return nil, fmt.Errorf("cannot skip JSON number: invalid character: %q", buf[i]) + } + + // Optional fractional part + if i < len(buf) && buf[i] == '.' { + i++ + if i >= len(buf) || buf[i] < '0' || buf[i] > '9' { + return nil, fmt.Errorf("cannot skip JSON number: expected digit after decimal point") + } + for i < len(buf) && buf[i] >= '0' && buf[i] <= '9' { + i++ + } + } + + // Optional exponent part + if i < len(buf) && (buf[i] == 'e' || buf[i] == 'E') { + i++ + if i >= len(buf) { + return nil, io.ErrShortBuffer + } + if buf[i] == '+' || buf[i] == '-' { + i++ + if i >= len(buf) { + return nil, io.ErrShortBuffer + } + } + if buf[i] < '0' || buf[i] > '9' { + return nil, fmt.Errorf("cannot skip JSON number: expected digit in exponent") + } + for i < len(buf) && buf[i] >= '0' && buf[i] <= '9' { + i++ + } + } + + return buf[i:], nil +} diff --git a/json_utils_test.go b/json_utils_test.go new file mode 100644 index 0000000..1bef7b8 --- /dev/null +++ b/json_utils_test.go @@ -0,0 +1,327 @@ +// Copyright [2019] LinkedIn Corp. Licensed under the Apache License, Version +// 2.0 (the "License"); you may not use this file except in compliance with the +// License. You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + +package goavro + +import ( + "io" + "testing" +) + +func TestSkipJSONString(t *testing.T) { + tests := []struct { + name string + input string + wantRest string + wantError bool + }{ + // Basic strings + {name: "empty string", input: `""`, wantRest: ""}, + {name: "simple string", input: `"hello"`, wantRest: ""}, + {name: "string with trailing", input: `"hello",next`, wantRest: ",next"}, + {name: "string with spaces", input: `"hello world"`, wantRest: ""}, + + // Escaped characters + {name: "escaped quote", input: `"hello\"world"`, wantRest: ""}, + {name: "escaped backslash", input: `"hello\\world"`, wantRest: ""}, + {name: "escaped newline", input: `"hello\nworld"`, wantRest: ""}, + {name: "escaped tab", input: `"hello\tworld"`, wantRest: ""}, + {name: "escaped unicode", input: `"hello\u0041world"`, wantRest: ""}, + {name: "multiple escapes", input: `"a\"b\\c\nd"`, wantRest: ""}, + {name: "escaped at end", input: `"test\\"`, wantRest: ""}, + + // Whitespace handling + {name: "leading whitespace", input: ` "hello"`, wantRest: ""}, + {name: "leading tabs", input: "\t\"hello\"", wantRest: ""}, + + // Error cases + {name: "missing open quote", input: `hello"`, wantError: true}, + {name: "missing close quote", input: `"hello`, wantError: true}, + {name: "escape at end", input: `"hello\`, wantError: true}, + {name: "empty input", input: ``, wantError: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rest, err := skipJSONString([]byte(tt.input)) + if tt.wantError { + if err == nil { + t.Errorf("expected error, got nil") + } + return + } + if err != nil { + t.Errorf("unexpected error: %v", err) + return + } + if string(rest) != tt.wantRest { + t.Errorf("got rest=%q, want=%q", string(rest), tt.wantRest) + } + }) + } +} + +func TestSkipJSONNumber(t *testing.T) { + tests := []struct { + name string + input string + wantRest string + wantError bool + }{ + // Integers + {name: "zero", input: "0", wantRest: ""}, + {name: "single digit", input: "5", wantRest: ""}, + {name: "multi digit", input: "123", wantRest: ""}, + {name: "negative", input: "-42", wantRest: ""}, + {name: "negative zero", input: "-0", wantRest: ""}, + + // Decimals + {name: "decimal", input: "3.14", wantRest: ""}, + {name: "decimal no int", input: "0.5", wantRest: ""}, + {name: "negative decimal", input: "-3.14", wantRest: ""}, + {name: "long decimal", input: "123.456789", wantRest: ""}, + + // Exponents + {name: "exponent lowercase", input: "1e10", wantRest: ""}, + {name: "exponent uppercase", input: "1E10", wantRest: ""}, + {name: "exponent positive", input: "1e+10", wantRest: ""}, + {name: "exponent negative", input: "1e-10", wantRest: ""}, + {name: "decimal with exponent", input: "3.14e10", wantRest: ""}, + {name: "negative with exponent", input: "-2.5E-3", wantRest: ""}, + + // With trailing content + {name: "trailing comma", input: "123,", wantRest: ","}, + {name: "trailing brace", input: "456}", wantRest: "}"}, + {name: "trailing bracket", input: "789]", wantRest: "]"}, + {name: "trailing whitespace", input: "123 ", wantRest: " "}, + + // Error cases + {name: "empty", input: "", wantError: true}, + {name: "just minus", input: "-", wantError: true}, + {name: "leading zero", input: "01", wantRest: "1"}, // valid: stops at 0, leaves "1" + {name: "decimal no digits", input: "1.", wantError: true}, + {name: "exponent no digits", input: "1e", wantError: true}, + {name: "exponent sign no digits", input: "1e+", wantError: true}, + {name: "invalid char", input: "abc", wantError: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rest, err := skipJSONNumber([]byte(tt.input)) + if tt.wantError { + if err == nil { + t.Errorf("expected error, got nil") + } + return + } + if err != nil { + t.Errorf("unexpected error: %v", err) + return + } + if string(rest) != tt.wantRest { + t.Errorf("got rest=%q, want=%q", string(rest), tt.wantRest) + } + }) + } +} + +func TestSkipJSONArray(t *testing.T) { + tests := []struct { + name string + input string + wantRest string + wantError bool + }{ + // Basic arrays + {name: "empty array", input: "[]", wantRest: ""}, + {name: "single number", input: "[1]", wantRest: ""}, + {name: "multiple numbers", input: "[1,2,3]", wantRest: ""}, + {name: "with spaces", input: "[ 1 , 2 , 3 ]", wantRest: ""}, + + // Mixed types + {name: "mixed types", input: `[1,"hello",true,null]`, wantRest: ""}, + {name: "nested array", input: "[[1,2],[3,4]]", wantRest: ""}, + {name: "nested object", input: `[{"a":1},{"b":2}]`, wantRest: ""}, + {name: "deeply nested", input: `[[[1]]]`, wantRest: ""}, + + // With trailing content + {name: "trailing comma", input: "[1,2],next", wantRest: ",next"}, + {name: "trailing brace", input: "[1]}", wantRest: "}"}, + + // Error cases + {name: "missing bracket", input: "[1,2", wantError: true}, + {name: "missing open", input: "1,2]", wantError: true}, + {name: "empty input", input: "", wantError: true}, + {name: "unclosed nested", input: "[[1,2]", wantError: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rest, err := skipJSONArray([]byte(tt.input)) + if tt.wantError { + if err == nil { + t.Errorf("expected error, got nil") + } + return + } + if err != nil { + t.Errorf("unexpected error: %v", err) + return + } + if string(rest) != tt.wantRest { + t.Errorf("got rest=%q, want=%q", string(rest), tt.wantRest) + } + }) + } +} + +func TestSkipJSONObject(t *testing.T) { + tests := []struct { + name string + input string + wantRest string + wantError bool + }{ + // Basic objects + {name: "empty object", input: "{}", wantRest: ""}, + {name: "single field", input: `{"a":1}`, wantRest: ""}, + {name: "multiple fields", input: `{"a":1,"b":2}`, wantRest: ""}, + {name: "with spaces", input: `{ "a" : 1 , "b" : 2 }`, wantRest: ""}, + + // Various value types + {name: "string value", input: `{"name":"John"}`, wantRest: ""}, + {name: "boolean value", input: `{"flag":true}`, wantRest: ""}, + {name: "null value", input: `{"empty":null}`, wantRest: ""}, + {name: "array value", input: `{"items":[1,2,3]}`, wantRest: ""}, + {name: "nested object", input: `{"outer":{"inner":1}}`, wantRest: ""}, + {name: "deeply nested", input: `{"a":{"b":{"c":{"d":1}}}}`, wantRest: ""}, + + // Complex cases + {name: "mixed types", input: `{"s":"str","n":123,"b":true,"nil":null,"a":[1],"o":{}}`, wantRest: ""}, + {name: "escaped key", input: `{"key\"with\"quotes":1}`, wantRest: ""}, + + // With trailing content + {name: "trailing comma", input: `{"a":1},next`, wantRest: ",next"}, + {name: "trailing bracket", input: `{"a":1}]`, wantRest: "]"}, + + // Error cases + {name: "missing brace", input: `{"a":1`, wantError: true}, + {name: "missing colon", input: `{"a"1}`, wantError: true}, + {name: "missing value", input: `{"a":}`, wantError: true}, + {name: "empty input", input: "", wantError: true}, + {name: "unclosed nested", input: `{"a":{"b":1}`, wantError: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rest, err := skipJSONObject([]byte(tt.input)) + if tt.wantError { + if err == nil { + t.Errorf("expected error, got nil") + } + return + } + if err != nil { + t.Errorf("unexpected error: %v", err) + return + } + if string(rest) != tt.wantRest { + t.Errorf("got rest=%q, want=%q", string(rest), tt.wantRest) + } + }) + } +} + +func TestSkipJSONValue(t *testing.T) { + tests := []struct { + name string + input string + wantRest string + wantError bool + }{ + // Primitives + {name: "true", input: "true", wantRest: ""}, + {name: "false", input: "false", wantRest: ""}, + {name: "null", input: "null", wantRest: ""}, + {name: "string", input: `"hello"`, wantRest: ""}, + {name: "number", input: "123", wantRest: ""}, + {name: "negative number", input: "-456", wantRest: ""}, + {name: "decimal", input: "3.14", wantRest: ""}, + + // Compound types + {name: "array", input: "[1,2,3]", wantRest: ""}, + {name: "object", input: `{"a":1}`, wantRest: ""}, + {name: "empty array", input: "[]", wantRest: ""}, + {name: "empty object", input: "{}", wantRest: ""}, + + // With whitespace + {name: "leading space", input: " true", wantRest: ""}, + {name: "leading newline", input: "\ntrue", wantRest: ""}, + {name: "leading tab", input: "\ttrue", wantRest: ""}, + + // With trailing content + {name: "true trailing", input: "true,next", wantRest: ",next"}, + {name: "false trailing", input: "false}", wantRest: "}"}, + {name: "null trailing", input: "null]", wantRest: "]"}, + + // Error cases + {name: "empty", input: "", wantError: true}, + {name: "whitespace only", input: " ", wantError: true}, + {name: "invalid true", input: "tru", wantError: true}, + {name: "invalid false", input: "fals", wantError: true}, + {name: "invalid null", input: "nul", wantError: true}, + {name: "invalid char", input: "xyz", wantError: true}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + rest, err := skipJSONValue([]byte(tt.input)) + if tt.wantError { + if err == nil { + t.Errorf("expected error, got nil") + } + return + } + if err != nil { + t.Errorf("unexpected error: %v", err) + return + } + if string(rest) != tt.wantRest { + t.Errorf("got rest=%q, want=%q", string(rest), tt.wantRest) + } + }) + } +} + +func TestSkipJSONValueShortBuffer(t *testing.T) { + // Test that io.ErrShortBuffer is returned for truncated inputs + tests := []struct { + name string + input string + }{ + {name: "truncated string", input: `"hello`}, + {name: "truncated array", input: `[1,2`}, + {name: "truncated object", input: `{"a":1`}, + {name: "truncated nested", input: `{"a":[1,2`}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + _, err := skipJSONValue([]byte(tt.input)) + if err == nil { + t.Errorf("expected error, got nil") + return + } + if err != io.ErrShortBuffer { + // Some errors might be more specific, that's fine + t.Logf("got error: %v (not io.ErrShortBuffer, but still an error)", err) + } + }) + } +} diff --git a/map.go b/map.go index 8bb885d..43ec103 100644 --- a/map.go +++ b/map.go @@ -141,7 +141,7 @@ func makeMapCodec(st map[string]*Codec, namespace string, schemaMap map[string]i return longBinaryFromNative(buf, 0) // append tailing 0 block count to signal end of Map }, nativeFromTextual: func(buf []byte) (interface{}, []byte, error) { - return genericMapTextDecoder(buf, valueCodec, nil) // codecFromKey == nil + return genericMapTextDecoder(buf, valueCodec, nil, false) // codecFromKey == nil, ignoreExtraFields == false }, textualFromNative: func(buf []byte, datum interface{}) ([]byte, error) { return genericMapTextEncoder(buf, datum, valueCodec, nil) @@ -152,10 +152,11 @@ func makeMapCodec(st map[string]*Codec, namespace string, schemaMap map[string]i // genericMapTextDecoder decodes a JSON text blob to a native Go map, using the // codecs from codecFromKey, and if a key is not found in that map, from // defaultCodec if provided. If defaultCodec is nil, this function returns an -// error if it encounters a map key that is not present in codecFromKey. If -// codecFromKey is nil, every map value will be decoded using defaultCodec, if +// error if it encounters a map key that is not present in codecFromKey, unless +// ignoreExtraFields is true, in which case the unknown field is skipped. +// If codecFromKey is nil, every map value will be decoded using defaultCodec, if // possible. -func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[string]*Codec) (map[string]interface{}, []byte, error) { +func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[string]*Codec, ignoreExtraFields bool) (map[string]interface{}, []byte, error) { var value interface{} var err error var b byte @@ -192,6 +193,31 @@ func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[str fieldCodec = defaultCodec } if fieldCodec == nil { + if ignoreExtraFields { + // Skip the colon and value for this unknown field + if buf, err = advanceAndConsume(buf, ':'); err != nil { + return nil, nil, err + } + if buf, err = skipJSONValue(buf); err != nil { + return nil, nil, fmt.Errorf("cannot skip unknown field %q: %s", key, err) + } + // Check for comma or closing brace + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, nil, io.ErrShortBuffer + } + switch b = buf[0]; b { + case '}': + return mapValues, buf[1:], nil + case ',': + buf = buf[1:] + if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { + return nil, nil, io.ErrShortBuffer + } + default: + return nil, nil, fmt.Errorf("cannot decode textual map: expected ',' or '}'; received: %q", b) + } + continue + } return nil, nil, fmt.Errorf("cannot decode textual map: cannot determine codec: %q", key) } // decode colon diff --git a/record.go b/record.go index 305dad6..28da676 100644 --- a/record.go +++ b/record.go @@ -194,13 +194,16 @@ func makeRecordCodec(st map[string]*Codec, enclosingNamespace string, schemaMap return recordMap, buf, nil } + // Capture the ignoreExtraFields option for use in the closure + ignoreExtraFields := cb.option != nil && cb.option.IgnoreExtraFieldsFromTextual + c.nativeFromTextual = func(buf []byte) (interface{}, []byte, error) { var mapValues map[string]interface{} var err error // NOTE: Setting `defaultCodec == nil` instructs genericMapTextDecoder // to return an error when a field name is not found in the - // codecFromFieldName map. - mapValues, buf, err = genericMapTextDecoder(buf, nil, codecFromFieldName) + // codecFromFieldName map, unless ignoreExtraFields is true. + mapValues, buf, err = genericMapTextDecoder(buf, nil, codecFromFieldName, ignoreExtraFields) if err != nil { return nil, nil, fmt.Errorf("cannot decode textual record %q: %s", c.typeName, err) } diff --git a/record_test.go b/record_test.go index 8bff185..ec102de 100644 --- a/record_test.go +++ b/record_test.go @@ -385,6 +385,142 @@ func TestRecordTextCodecPass(t *testing.T) { testTextDecodePass(t, `{"name":"r1","type":"record","fields":[{"name":"string","type":"string"},{"name":"bytes","type":"bytes"}]}`, map[string]interface{}{"string": silly, "bytes": []byte(silly)}, []byte(` { "string" : "\u0001\u2318 " , "bytes" : "\u0001\u00E2\u008C\u0098 " }`)) } +func TestRecordIgnoreExtraFieldsFromTextual(t *testing.T) { + schema := `{"name":"r1","type":"record","fields":[{"name":"name","type":"string"},{"name":"age","type":"int"}]}` + + // Test that extra fields cause error by default + t.Run("default behavior rejects extra fields", func(t *testing.T) { + codec, err := NewCodec(schema) + if err != nil { + t.Fatal(err) + } + jsonWithExtraField := []byte(`{"name":"Alice","age":30,"extraField":"ignored"}`) + _, _, err = codec.NativeFromTextual(jsonWithExtraField) + if err == nil { + t.Fatal("expected error for extra field, got nil") + } + if !bytes.Contains([]byte(err.Error()), []byte("cannot determine codec")) { + t.Fatalf("expected 'cannot determine codec' error, got: %s", err) + } + }) + + // Test that extra fields are ignored when IgnoreExtraFieldsFromTextual is true + t.Run("ignores extra fields when option enabled", func(t *testing.T) { + opt := &CodecOption{IgnoreExtraFieldsFromTextual: true} + codec, err := NewCodecWithOptions(schema, opt) + if err != nil { + t.Fatal(err) + } + + jsonWithExtraField := []byte(`{"name":"Alice","age":30,"extraField":"ignored"}`) + native, remaining, err := codec.NativeFromTextual(jsonWithExtraField) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + if len(remaining) != 0 { + t.Fatalf("expected empty remaining buffer, got: %v", remaining) + } + + m, ok := native.(map[string]interface{}) + if !ok { + t.Fatalf("expected map[string]interface{}, got: %T", native) + } + if m["name"] != "Alice" { + t.Errorf("expected name='Alice', got: %v", m["name"]) + } + if m["age"] != int32(30) { + t.Errorf("expected age=30, got: %v (type %T)", m["age"], m["age"]) + } + // extraField should NOT be in the result + if _, exists := m["extraField"]; exists { + t.Error("extraField should not be present in decoded result") + } + }) + + // Test with multiple extra fields of various types + t.Run("ignores multiple extra fields of various types", func(t *testing.T) { + opt := &CodecOption{IgnoreExtraFieldsFromTextual: true} + codec, err := NewCodecWithOptions(schema, opt) + if err != nil { + t.Fatal(err) + } + + // Extra fields: string, number, boolean, null, array, object + jsonWithManyExtras := []byte(`{ + "name": "Bob", + "extraString": "hello", + "age": 25, + "extraNumber": 123.45, + "extraBool": true, + "extraNull": null, + "extraArray": [1, 2, 3], + "extraObject": {"nested": "value"} + }`) + native, remaining, err := codec.NativeFromTextual(jsonWithManyExtras) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + if len(remaining) != 0 { + t.Fatalf("expected empty remaining buffer, got: %v", remaining) + } + + m, ok := native.(map[string]interface{}) + if !ok { + t.Fatalf("expected map[string]interface{}, got: %T", native) + } + if m["name"] != "Bob" { + t.Errorf("expected name='Bob', got: %v", m["name"]) + } + if m["age"] != int32(25) { + t.Errorf("expected age=25, got: %v", m["age"]) + } + // Only schema fields should be present + if len(m) != 2 { + t.Errorf("expected 2 fields, got %d: %v", len(m), m) + } + }) + + // Test with extra field at the end (after last schema field) + t.Run("ignores extra field at end", func(t *testing.T) { + opt := &CodecOption{IgnoreExtraFieldsFromTextual: true} + codec, err := NewCodecWithOptions(schema, opt) + if err != nil { + t.Fatal(err) + } + + json := []byte(`{"name":"Charlie","age":35,"trailing":"field"}`) + native, _, err := codec.NativeFromTextual(json) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + + m := native.(map[string]interface{}) + if m["name"] != "Charlie" || m["age"] != int32(35) { + t.Errorf("unexpected values: %v", m) + } + }) + + // Test with extra field at the beginning (before first schema field) + t.Run("ignores extra field at beginning", func(t *testing.T) { + opt := &CodecOption{IgnoreExtraFieldsFromTextual: true} + codec, err := NewCodecWithOptions(schema, opt) + if err != nil { + t.Fatal(err) + } + + json := []byte(`{"leading":"field","name":"Diana","age":40}`) + native, _, err := codec.NativeFromTextual(json) + if err != nil { + t.Fatalf("unexpected error: %s", err) + } + + m := native.(map[string]interface{}) + if m["name"] != "Diana" || m["age"] != int32(40) { + t.Errorf("unexpected values: %v", m) + } + }) +} + func TestRecordFieldDefaultValue(t *testing.T) { testSchemaValid(t, `{"type":"record","name":"r1","fields":[{"name":"f1","type":"int","default":13}]}`) testSchemaValid(t, `{"type":"record","name":"r1","fields":[{"name":"f1","type":"string","default":"foo"}]}`) diff --git a/union.go b/union.go index ac5a18f..da07701 100644 --- a/union.go +++ b/union.go @@ -151,7 +151,8 @@ func unionNativeFromTextual(cr *codecInfo) func(buf []byte) (interface{}, []byte var datum interface{} var err error - datum, buf, err = genericMapTextDecoder(buf, nil, cr.codecFromName) + // For unions, we never ignore extra fields - the map keys represent type names + datum, buf, err = genericMapTextDecoder(buf, nil, cr.codecFromName, false) if err != nil { return nil, nil, fmt.Errorf("cannot decode textual union: %s", err) }