diff --git a/codec.go b/codec.go index 192bcf6..a141548 100644 --- a/codec.go +++ b/codec.go @@ -49,6 +49,11 @@ type CodecOption struct { // When true, the string literal "null" in textual Avro data will be coerced to Go's nil. // Primarily used to handle edge cases where some Avro implementations allow string representations of null. EnableStringNull bool + + // SkipAdditionalFields controls handling of additional fields during decoding. + // When true, any additional input fields not defined in the schema will be skipped during decoding. + // When false (default), any additional input fields not defined in the schema will result in an error. + SkipAdditionalFields bool } // Codec supports decoding binary and text Avro data to Go native data types, diff --git a/map.go b/map.go index 8bb885d..6f90f7f 100644 --- a/map.go +++ b/map.go @@ -141,7 +141,7 @@ func makeMapCodec(st map[string]*Codec, namespace string, schemaMap map[string]i return longBinaryFromNative(buf, 0) // append tailing 0 block count to signal end of Map }, nativeFromTextual: func(buf []byte) (interface{}, []byte, error) { - return genericMapTextDecoder(buf, valueCodec, nil) // codecFromKey == nil + return genericMapTextDecoder(buf, valueCodec, nil, cb.option.SkipAdditionalFields) // codecFromKey == nil }, textualFromNative: func(buf []byte, datum interface{}) ([]byte, error) { return genericMapTextEncoder(buf, datum, valueCodec, nil) @@ -155,7 +155,12 @@ func makeMapCodec(st map[string]*Codec, namespace string, schemaMap map[string]i // error if it encounters a map key that is not present in codecFromKey. If // codecFromKey is nil, every map value will be decoded using defaultCodec, if // possible. -func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[string]*Codec) (map[string]interface{}, []byte, error) { +func genericMapTextDecoder( + buf []byte, + defaultCodec *Codec, + codecFromKey map[string]*Codec, + skipAdditionalFields bool, +) (map[string]interface{}, []byte, error) { var value interface{} var err error var b byte @@ -191,7 +196,7 @@ func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[str if fieldCodec == nil { fieldCodec = defaultCodec } - if fieldCodec == nil { + if fieldCodec == nil && !skipAdditionalFields { return nil, nil, fmt.Errorf("cannot decode textual map: cannot determine codec: %q", key) } // decode colon @@ -202,12 +207,19 @@ func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[str if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { return nil, nil, io.ErrShortBuffer } - value, buf, err = fieldCodec.nativeFromTextual(buf) - if err != nil { - return nil, nil, fmt.Errorf("%s for key: %q", err, key) + if fieldCodec != nil { + value, buf, err = fieldCodec.nativeFromTextual(buf) + if err != nil { + return nil, nil, fmt.Errorf("%s for key: %q", err, key) + } + // set map value for key + mapValues[key] = value + } else { + // skipAdditionalFields is true and we have no codec + if buf, _ = advanceToNextValue(buf); len(buf) == 0 { + return nil, nil, io.ErrShortBuffer + } } - // set map value for key - mapValues[key] = value // either comma or closing curly brace if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 { return nil, nil, io.ErrShortBuffer diff --git a/map_test.go b/map_test.go index d1b02d2..84ee309 100644 --- a/map_test.go +++ b/map_test.go @@ -12,6 +12,7 @@ package goavro import ( "fmt" "log" + "reflect" "testing" ) @@ -149,6 +150,67 @@ func TestMapTextualReceiveSliceInt(t *testing.T) { testTextEncodeFail(t, `{"type":"map","values":"int"}`, map[int]int{42: 13}, "cannot create map[string]interface{}") } +// TestMapSkipAdditionalFields tests that when a map is used within a record, +// additional fields at the record level can be skipped when SkipAdditionalFields is enabled. +func TestMapSkipAdditionalFields(t *testing.T) { + // Note: For a plain map type, all string keys are valid, so we test + // the skip functionality in the context of a record containing a map + schema := `{ + "type": "record", + "name": "RecordWithMap", + "fields": [ + { + "name": "validMap", + "type": {"type": "map", "values": "int"} + } + ] + }` + + testSkipAdditionalFieldsFail(t, schema, false, []byte(`{"validMap": {}, "additionalField": 1}`)) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": 1}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": 1, "additionalField2": 2}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": true}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": "1"}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": {"nested": 1"}}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) + testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": [1, 2, 3]}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}}) +} + +func testSkipAdditionalFieldsFail(t *testing.T, schema string, skipAdditionalFields bool, input []byte) { + t.Helper() + option := &CodecOption{ + SkipAdditionalFields: skipAdditionalFields, + } + codec, err := NewCodecWithOptions(schema, option) + if err != nil { + t.Fatal(err) + } + _, _, err = codec.NativeFromTextual(input) + if err == nil { + t.Error("Expected error when decoding record with additional field, but got nil") + } +} + +func testSkipAdditionalFieldsPass(t *testing.T, schema string, skipAdditionalFields bool, input []byte, expected interface{}) { + t.Helper() + option := &CodecOption{ + SkipAdditionalFields: skipAdditionalFields, + } + codec, err := NewCodecWithOptions(schema, option) + if err != nil { + t.Fatal(err) + } + datum, remaining, err := codec.NativeFromTextual(input) + if err != nil { + t.Fatalf("Unexpected error: %v", err) + } + if len(remaining) != 0 { + t.Errorf("Expected no remaining bytes, got %d", len(remaining)) + } + if !reflect.DeepEqual(datum, expected) { + t.Errorf("Expected %v, got %v", expected, datum) + } +} + func ExampleMap() { codec, err := NewCodec(`{ "name": "r1", diff --git a/record.go b/record.go index 305dad6..763d58a 100644 --- a/record.go +++ b/record.go @@ -200,7 +200,7 @@ func makeRecordCodec(st map[string]*Codec, enclosingNamespace string, schemaMap // NOTE: Setting `defaultCodec == nil` instructs genericMapTextDecoder // to return an error when a field name is not found in the // codecFromFieldName map. - mapValues, buf, err = genericMapTextDecoder(buf, nil, codecFromFieldName) + mapValues, buf, err = genericMapTextDecoder(buf, nil, codecFromFieldName, cb.option.SkipAdditionalFields) if err != nil { return nil, nil, fmt.Errorf("cannot decode textual record %q: %s", c.typeName, err) } diff --git a/text.go b/text.go index 10e5308..00225ed 100644 --- a/text.go +++ b/text.go @@ -39,3 +39,34 @@ func advanceToNonWhitespace(buf []byte) ([]byte, error) { } return nil, io.ErrShortBuffer } + +var ( + valueBoundaries = map[byte]byte{ + '"': '"', + '{': '}', + '[': ']', + } +) + +func advanceToNextValue(buf []byte) ([]byte, error) { + for openingRune, closingRune := range valueBoundaries { + if buf[0] == openingRune { + for i := 1; i < len(buf); i++ { + if buf[i] == closingRune { + return buf[i+1:], nil + } + } + return nil, io.ErrShortBuffer + } + } + + // If we get here, then we are not in a string, map, or array. most likely a number or boolean + // We can just scan ahead to the next comma, closing brace, or closing bracket + for i := 1; i < len(buf); i++ { + if buf[i] == ',' || buf[i] == '}' || buf[i] == ']' { + return buf[i:], nil + } + } + + return nil, fmt.Errorf("expected: token; actual: %q", buf[0]) +} diff --git a/union.go b/union.go index ac5a18f..9558271 100644 --- a/union.go +++ b/union.go @@ -24,6 +24,7 @@ type codecInfo struct { codecFromIndex []*Codec codecFromName map[string]*Codec indexFromName map[string]int + option *CodecOption } // Union wraps a datum value in a map for encoding as a Union, as required by @@ -83,6 +84,7 @@ func makeCodecInfo(st map[string]*Codec, enclosingNamespace string, schemaArray codecFromIndex: codecFromIndex, codecFromName: codecFromName, indexFromName: indexFromName, + option: cb.option, }, nil } @@ -151,7 +153,7 @@ func unionNativeFromTextual(cr *codecInfo) func(buf []byte) (interface{}, []byte var datum interface{} var err error - datum, buf, err = genericMapTextDecoder(buf, nil, cr.codecFromName) + datum, buf, err = genericMapTextDecoder(buf, nil, cr.codecFromName, cr.option.SkipAdditionalFields) if err != nil { return nil, nil, fmt.Errorf("cannot decode textual union: %s", err) }