Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions codec.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,11 @@ type CodecOption struct {
// When true, the string literal "null" in textual Avro data will be coerced to Go's nil.
// Primarily used to handle edge cases where some Avro implementations allow string representations of null.
EnableStringNull bool

// SkipAdditionalFields controls handling of additional fields during decoding.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This needs to mention it is for the textual format only.

// When true, any additional input fields not defined in the schema will be skipped during decoding.
// When false (default), any additional input fields not defined in the schema will result in an error.
SkipAdditionalFields bool
}

// Codec supports decoding binary and text Avro data to Go native data types,
Expand Down
28 changes: 20 additions & 8 deletions map.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ func makeMapCodec(st map[string]*Codec, namespace string, schemaMap map[string]i
return longBinaryFromNative(buf, 0) // append tailing 0 block count to signal end of Map
},
nativeFromTextual: func(buf []byte) (interface{}, []byte, error) {
return genericMapTextDecoder(buf, valueCodec, nil) // codecFromKey == nil
return genericMapTextDecoder(buf, valueCodec, nil, cb.option.SkipAdditionalFields) // codecFromKey == nil
},
textualFromNative: func(buf []byte, datum interface{}) ([]byte, error) {
return genericMapTextEncoder(buf, datum, valueCodec, nil)
Expand All @@ -155,7 +155,12 @@ func makeMapCodec(st map[string]*Codec, namespace string, schemaMap map[string]i
// error if it encounters a map key that is not present in codecFromKey. If
// codecFromKey is nil, every map value will be decoded using defaultCodec, if
// possible.
func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[string]*Codec) (map[string]interface{}, []byte, error) {
func genericMapTextDecoder(
buf []byte,
defaultCodec *Codec,
codecFromKey map[string]*Codec,
skipAdditionalFields bool,
) (map[string]interface{}, []byte, error) {
var value interface{}
var err error
var b byte
Expand Down Expand Up @@ -191,7 +196,7 @@ func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[str
if fieldCodec == nil {
fieldCodec = defaultCodec
}
if fieldCodec == nil {
if fieldCodec == nil && !skipAdditionalFields {
return nil, nil, fmt.Errorf("cannot decode textual map: cannot determine codec: %q", key)
}
// decode colon
Expand All @@ -202,12 +207,19 @@ func genericMapTextDecoder(buf []byte, defaultCodec *Codec, codecFromKey map[str
if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 {
return nil, nil, io.ErrShortBuffer
}
value, buf, err = fieldCodec.nativeFromTextual(buf)
if err != nil {
return nil, nil, fmt.Errorf("%s for key: %q", err, key)
if fieldCodec != nil {
value, buf, err = fieldCodec.nativeFromTextual(buf)
if err != nil {
return nil, nil, fmt.Errorf("%s for key: %q", err, key)
}
// set map value for key
mapValues[key] = value
} else {
// skipAdditionalFields is true and we have no codec
if buf, _ = advanceToNextValue(buf); len(buf) == 0 {
return nil, nil, io.ErrShortBuffer
}
}
// set map value for key
mapValues[key] = value
// either comma or closing curly brace
if buf, _ = advanceToNonWhitespace(buf); len(buf) == 0 {
return nil, nil, io.ErrShortBuffer
Expand Down
62 changes: 62 additions & 0 deletions map_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ package goavro
import (
"fmt"
"log"
"reflect"
"testing"
)

Expand Down Expand Up @@ -149,6 +150,67 @@ func TestMapTextualReceiveSliceInt(t *testing.T) {
testTextEncodeFail(t, `{"type":"map","values":"int"}`, map[int]int{42: 13}, "cannot create map[string]interface{}")
}

// TestMapSkipAdditionalFields tests that when a map is used within a record,
// additional fields at the record level can be skipped when SkipAdditionalFields is enabled.
func TestMapSkipAdditionalFields(t *testing.T) {
// Note: For a plain map type, all string keys are valid, so we test
// the skip functionality in the context of a record containing a map
schema := `{
"type": "record",
"name": "RecordWithMap",
"fields": [
{
"name": "validMap",
"type": {"type": "map", "values": "int"}
}
]
}`

testSkipAdditionalFieldsFail(t, schema, false, []byte(`{"validMap": {}, "additionalField": 1}`))
testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": 1}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}})
testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": 1, "additionalField2": 2}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}})
testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": true}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}})
testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": "1"}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}})
testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": {"nested": 1"}}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}})
testSkipAdditionalFieldsPass(t, schema, true, []byte(`{"validMap": {"k1": 1}, "additionalField": [1, 2, 3]}`), map[string]interface{}{"validMap": map[string]interface{}{"k1": int32(1)}})
}

func testSkipAdditionalFieldsFail(t *testing.T, schema string, skipAdditionalFields bool, input []byte) {
t.Helper()
option := &CodecOption{
SkipAdditionalFields: skipAdditionalFields,
}
codec, err := NewCodecWithOptions(schema, option)
if err != nil {
t.Fatal(err)
}
_, _, err = codec.NativeFromTextual(input)
if err == nil {
t.Error("Expected error when decoding record with additional field, but got nil")
}
}

func testSkipAdditionalFieldsPass(t *testing.T, schema string, skipAdditionalFields bool, input []byte, expected interface{}) {
t.Helper()
option := &CodecOption{
SkipAdditionalFields: skipAdditionalFields,
}
codec, err := NewCodecWithOptions(schema, option)
if err != nil {
t.Fatal(err)
}
datum, remaining, err := codec.NativeFromTextual(input)
if err != nil {
t.Fatalf("Unexpected error: %v", err)
}
if len(remaining) != 0 {
t.Errorf("Expected no remaining bytes, got %d", len(remaining))
}
if !reflect.DeepEqual(datum, expected) {
t.Errorf("Expected %v, got %v", expected, datum)
}
}

func ExampleMap() {
codec, err := NewCodec(`{
"name": "r1",
Expand Down
2 changes: 1 addition & 1 deletion record.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ func makeRecordCodec(st map[string]*Codec, enclosingNamespace string, schemaMap
// NOTE: Setting `defaultCodec == nil` instructs genericMapTextDecoder
// to return an error when a field name is not found in the
// codecFromFieldName map.
mapValues, buf, err = genericMapTextDecoder(buf, nil, codecFromFieldName)
mapValues, buf, err = genericMapTextDecoder(buf, nil, codecFromFieldName, cb.option.SkipAdditionalFields)
if err != nil {
return nil, nil, fmt.Errorf("cannot decode textual record %q: %s", c.typeName, err)
}
Expand Down
31 changes: 31 additions & 0 deletions text.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,34 @@ func advanceToNonWhitespace(buf []byte) ([]byte, error) {
}
return nil, io.ErrShortBuffer
}

var (
valueBoundaries = map[byte]byte{
'"': '"',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn’t account for escaped quotes.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and maybe not for nested object/arrays? It just stops at the first thing it sees

'{': '}',
'[': ']',
}
)

func advanceToNextValue(buf []byte) ([]byte, error) {
for openingRune, closingRune := range valueBoundaries {
if buf[0] == openingRune {
for i := 1; i < len(buf); i++ {
if buf[i] == closingRune {
return buf[i+1:], nil
}
}
return nil, io.ErrShortBuffer
}
}

// If we get here, then we are not in a string, map, or array. most likely a number or boolean
// We can just scan ahead to the next comma, closing brace, or closing bracket
for i := 1; i < len(buf); i++ {
if buf[i] == ',' || buf[i] == '}' || buf[i] == ']' {
return buf[i:], nil
}
}

return nil, fmt.Errorf("expected: token; actual: %q", buf[0])
}
4 changes: 3 additions & 1 deletion union.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ type codecInfo struct {
codecFromIndex []*Codec
codecFromName map[string]*Codec
indexFromName map[string]int
option *CodecOption
}

// Union wraps a datum value in a map for encoding as a Union, as required by
Expand Down Expand Up @@ -83,6 +84,7 @@ func makeCodecInfo(st map[string]*Codec, enclosingNamespace string, schemaArray
codecFromIndex: codecFromIndex,
codecFromName: codecFromName,
indexFromName: indexFromName,
option: cb.option,
}, nil

}
Expand Down Expand Up @@ -151,7 +153,7 @@ func unionNativeFromTextual(cr *codecInfo) func(buf []byte) (interface{}, []byte

var datum interface{}
var err error
datum, buf, err = genericMapTextDecoder(buf, nil, cr.codecFromName)
datum, buf, err = genericMapTextDecoder(buf, nil, cr.codecFromName, cr.option.SkipAdditionalFields)
if err != nil {
return nil, nil, fmt.Errorf("cannot decode textual union: %s", err)
}
Expand Down