diff --git a/README.md b/README.md index f3c73635..fb335ad7 100644 --- a/README.md +++ b/README.md @@ -301,6 +301,19 @@ exist, err := root.UnsetByIndex(1) // exist == true println(root.Get("key4").Check()) // "value not exist" ``` +#### SearchOption +```go +opts := ast.SearchOption{ CopyReturn: true ... } +val, err := ast.GetWithOption(JSON, opts, "key"...) +``` +`Searcher` provides some options for use to meet different needs: +- CopyReturn +Indicate the searcher to copy the result JSON string instead of refer from the input. This can help to reduce memory usage if you cache the results +- ConcurentRead +Since `ast.Node` use `Lazy-Load` design, it doesn't support Concurrently-Read by default. If you want to read it concurrently, please specify it. +- ValidateJSON +Indicate the searcher to validate the entire JSON. This option is enabled by default. + #### Serialize To encode `ast.Node` as json, use `MarshalJson()` or `json.Marshal()` (MUST pass the node's pointer) @@ -466,6 +479,7 @@ For better performance, in previous case the `ast.Visitor` will be the better ch But `ast.Visitor` is not a very handy API. You might need to write a lot of code to implement your visitor and carefully maintain the tree hierarchy during decoding. Please read the comments in [ast/visitor.go](https://github.com/bytedance/sonic/blob/main/ast/visitor.go) carefully if you decide to use this API. + ## Community Sonic is a subproject of [CloudWeGo](https://www.cloudwego.io/). We are committed to building a cloud native ecosystem. diff --git a/api.go b/api.go index 09332912..93dac58e 100644 --- a/api.go +++ b/api.go @@ -189,6 +189,12 @@ func Get(src []byte, path ...interface{}) (ast.Node, error) { return GetCopyFromString(rt.Mem2Str(src), path...) } +func GetWithOptions(src []byte, opts ast.SearchOptions, path ...interface{}) (ast.Node, error) { + s := ast.NewSearcher(rt.Mem2Str(src)) + s.SearchOptions = opts + return s.GetByPath(path...) +} + // GetFromString is same with Get except src is string. // // WARNING: The returned JSON is **Referenced** from the input. diff --git a/ast/encode.go b/ast/encode.go index 956809c2..dc1c1807 100644 --- a/ast/encode.go +++ b/ast/encode.go @@ -17,12 +17,10 @@ package ast import ( - `sync` - `unicode/utf8` -) + "sync" + "unicode/utf8" -const ( - _MaxBuffer = 1024 // 1KB buffer size + "github.com/bytedance/sonic/option" ) func quoteString(e *[]byte, s string) { @@ -109,7 +107,7 @@ func newBuffer() *[]byte { if ret := bytesPool.Get(); ret != nil { return ret.(*[]byte) } else { - buf := make([]byte, 0, _MaxBuffer) + buf := make([]byte, 0, option.DefaultAstEncoderBufferSize) return &buf } } @@ -120,10 +118,10 @@ func freeBuffer(buf *[]byte) { } func (self *Node) encode(buf *[]byte) error { - if self.IsRaw() { + if self.isRaw() { return self.encodeRaw(buf) } - switch self.Type() { + switch int(self.itype()) { case V_NONE : return ErrNotExist case V_ERROR : return self.Check() case V_NULL : return self.encodeNull(buf) @@ -139,9 +137,14 @@ func (self *Node) encode(buf *[]byte) error { } func (self *Node) encodeRaw(buf *[]byte) error { - raw, err := self.Raw() - if err != nil { - return err + lock := self.rlock() + if !self.isRaw() { + self.runlock() + return self.encode(buf) + } + raw := self.toString() + if lock { + self.runlock() } *buf = append(*buf, raw...) return nil diff --git a/ast/encode_test.go b/ast/encode_test.go index 250f82b3..9b07e81b 100644 --- a/ast/encode_test.go +++ b/ast/encode_test.go @@ -141,7 +141,7 @@ func TestEncodeNode(t *testing.T) { if string(ret) != data { t.Fatal(string(ret)) } - root.loadAllKey() + root.Load() ret, err = root.MarshalJSON() if err != nil { t.Fatal(err) @@ -228,7 +228,7 @@ func BenchmarkEncodeLoad_Sonic(b *testing.B) { if e != 0 { b.Fatal(root) } - root.loadAllKey() + root.Load() _, err := root.MarshalJSON() if err != nil { b.Fatal(err) diff --git a/ast/iterator.go b/ast/iterator.go index 64e1e5a9..34e249a9 100644 --- a/ast/iterator.go +++ b/ast/iterator.go @@ -29,7 +29,7 @@ type Pair struct { // Values returns iterator for array's children traversal func (self *Node) Values() (ListIterator, error) { - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return ListIterator{}, err } return self.values(), nil @@ -41,7 +41,7 @@ func (self *Node) values() ListIterator { // Properties returns iterator for object's children traversal func (self *Node) Properties() (ObjectIterator, error) { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return ObjectIterator{}, err } return self.properties(), nil diff --git a/ast/node.go b/ast/node.go index ac6d2280..84ba0d9a 100644 --- a/ast/node.go +++ b/ast/node.go @@ -17,13 +17,15 @@ package ast import ( - `encoding/json` - `fmt` - `strconv` - `unsafe` - - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` + "encoding/json" + "fmt" + "strconv" + "sync" + "sync/atomic" + "unsafe" + + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" ) const ( @@ -36,7 +38,7 @@ const ( _V_ARRAY_LAZY = _V_LAZY | types.V_ARRAY _V_OBJECT_LAZY = _V_LAZY | types.V_OBJECT _MASK_LAZY = _V_LAZY - 1 - _MASK_RAW = _V_RAW - 1 + _MASK_RAW = _V_RAW - 1 ) const ( @@ -56,6 +58,7 @@ type Node struct { t types.ValueType l uint p unsafe.Pointer + m *sync.RWMutex } // UnmarshalJSON is just an adapter to json.Unmarshaler. @@ -83,13 +86,17 @@ func (self Node) Type() int { return int(self.t & _MASK_LAZY & _MASK_RAW) } -func (self Node) itype() types.ValueType { +func (self *Node) itype() types.ValueType { return self.t & _MASK_LAZY & _MASK_RAW } // Exists returns false only if the self is nil or empty node V_NONE func (self *Node) Exists() bool { - return self.Valid() && self.t != _V_NONE + if self == nil { + return false + } + t := self.loadt() + return t != V_ERROR && t != _V_NONE } // Valid reports if self is NOT V_ERROR or nil @@ -97,7 +104,7 @@ func (self *Node) Valid() bool { if self == nil { return false } - return self.t != V_ERROR + return self.loadt() != V_ERROR } // Check checks if the node itself is valid, and return: @@ -106,20 +113,26 @@ func (self *Node) Valid() bool { func (self *Node) Check() error { if self == nil { return ErrNotExist - } else if self.t != V_ERROR { + } else if self.loadt() != V_ERROR { return nil } else { return self } } -// IsRaw returns true if node's underlying value is raw json +// isRaw returns true if node's underlying value is raw json +// Deprecated: not concurent safe func (self Node) IsRaw() bool { - return self.t&_V_RAW != 0 + return self.t & _V_RAW != 0 +} + +// IsRaw returns true if node's underlying value is raw json +func (self *Node) isRaw() bool { + return self.loadt() & _V_RAW != 0 } func (self *Node) isLazy() bool { - return self != nil && self.t&_V_LAZY != 0 + return self != nil && self.t & _V_LAZY != 0 } func (self *Node) isAny() bool { @@ -133,18 +146,26 @@ func (self *Node) Raw() (string, error) { if self == nil { return "", ErrNotExist } - if !self.IsRaw() { + lock := self.rlock() + if !self.isRaw() { + if lock { + self.runlock() + } buf, err := self.MarshalJSON() return rt.Mem2Str(buf), err } - return self.toString(), nil + ret := self.toString() + if lock { + self.runlock() + } + return ret, nil } func (self *Node) checkRaw() error { if err := self.Check(); err != nil { return err } - if self.IsRaw() { + if self.isRaw() { self.parseRaw(false) } return self.Check() @@ -504,7 +525,7 @@ func (self *Node) Len() (int, error) { } } -func (self Node) len() int { +func (self *Node) len() int { return int(self.l) } @@ -527,7 +548,7 @@ func (self *Node) Cap() (int, error) { // // If self is V_NONE or V_NULL, it becomes V_OBJECT and sets the node at the key. func (self *Node) Set(key string, node Node) (bool, error) { - if err := self.Check(); err != nil { + if err := self.checkRaw(); err != nil { return false, err } if err := node.Check(); err != nil { @@ -568,7 +589,7 @@ func (self *Node) SetAny(key string, val interface{}) (bool, error) { // Unset REMOVE (soft) the node of given key under object parent, and reports if the key has existed. func (self *Node) Unset(key string) (bool, error) { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return false, err } // NOTICE: must get acurate length before deduct @@ -589,7 +610,7 @@ func (self *Node) Unset(key string) (bool, error) { // // The index must be within self's children. func (self *Node) SetByIndex(index int, node Node) (bool, error) { - if err := self.Check(); err != nil { + if err := self.checkRaw(); err != nil { return false, err } if err := node.Check(); err != nil { @@ -669,7 +690,7 @@ func (self *Node) UnsetByIndex(index int) (bool, error) { // // If self is V_NONE or V_NULL, it becomes V_ARRAY and sets the node at index 0. func (self *Node) Add(node Node) error { - if err := self.Check(); err != nil { + if err := self.checkRaw(); err != nil { return err } @@ -677,7 +698,7 @@ func (self *Node) Add(node Node) error { *self = NewArray([]Node{node}) return nil } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return err } @@ -740,7 +761,7 @@ func (self *Node) Pop() error { // // WARN: this will change address of elements, which is a dangerous action. func (self *Node) Move(dst, src int) error { - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return err } @@ -812,7 +833,7 @@ func (self *Node) GetByPath(path ...interface{}) *Node { // Get loads given key of an object node on demands func (self *Node) Get(key string) *Node { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return unwrapError(err) } n, _ := self.skipKey(key) @@ -845,14 +866,14 @@ func (self *Node) Index(idx int) *Node { // IndexPair indexies pair at given idx, // node type MUST be either V_OBJECT func (self *Node) IndexPair(idx int) *Pair { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil } return self.skipIndexPair(idx) } func (self *Node) indexOrGet(idx int, key string) (*Node, int) { - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return unwrapError(err), idx } @@ -889,10 +910,10 @@ func (self *Node) Map() (map[string]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil, err } - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObject() @@ -908,10 +929,10 @@ func (self *Node) MapUseNumber() (map[string]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil, err } - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObjectUseNumber() @@ -928,7 +949,7 @@ func (self *Node) MapUseNode() (map[string]Node, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_OBJECT, "an object"); err != nil { + if err := self.should(types.V_OBJECT); err != nil { return nil, err } if err := self.skipAllKey(); err != nil { @@ -1034,10 +1055,10 @@ func (self *Node) Array() ([]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return nil, err } - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArray() @@ -1053,10 +1074,10 @@ func (self *Node) ArrayUseNumber() ([]interface{}, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return nil, err } - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArrayUseNumber() @@ -1073,7 +1094,7 @@ func (self *Node) ArrayUseNode() ([]Node, error) { return nil, ErrUnsupportType } } - if err := self.should(types.V_ARRAY, "an array"); err != nil { + if err := self.should(types.V_ARRAY); err != nil { return nil, err } if err := self.skipAllIndex(); err != nil { @@ -1129,12 +1150,12 @@ func (self *Node) Interface() (interface{}, error) { } return v, nil case _V_ARRAY_LAZY : - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArray() case _V_OBJECT_LAZY : - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObject() @@ -1168,12 +1189,12 @@ func (self *Node) InterfaceUseNumber() (interface{}, error) { case types.V_STRING : return self.toString(), nil case _V_NUMBER : return self.toNumber(), nil case _V_ARRAY_LAZY : - if err := self.loadAllIndex(); err != nil { + if err := self.loadAllIndex(false); err != nil { return nil, err } return self.toGenericArrayUseNumber() case _V_OBJECT_LAZY : - if err := self.loadAllKey(); err != nil { + if err := self.loadAllKey(false); err != nil { return nil, err } return self.toGenericObjectUseNumber() @@ -1205,70 +1226,30 @@ func (self *Node) InterfaceUseNode() (interface{}, error) { } } -// LoadAll loads all the node's children and children's children as parsed. -// After calling it, the node can be safely used on concurrency +// LoadAll loads the node's children +// and ensure all its children can be READ concurrently (include its children's children) func (self *Node) LoadAll() error { - if self.IsRaw() { - self.parseRaw(true) - return self.Check() - } - - switch self.itype() { - case types.V_ARRAY: - e := self.len() - if err := self.loadAllIndex(); err != nil { - return err - } - for i := 0; i < e; i++ { - n := self.nodeAt(i) - if n.IsRaw() { - n.parseRaw(true) - } - if err := n.Check(); err != nil { - return err - } - } - return nil - case types.V_OBJECT: - e := self.len() - if err := self.loadAllKey(); err != nil { - return err - } - for i := 0; i < e; i++ { - n := self.pairAt(i) - if n.Value.IsRaw() { - n.Value.parseRaw(true) - } - if err := n.Value.Check(); err != nil { - return err - } - } - return nil - default: - return self.Check() - } + return self.Load() } // Load loads the node's children as parsed. -// After calling it, only the node itself can be used on concurrency (not include its children) +// and ensure all its children can be READ concurrently (include its children's children) func (self *Node) Load() error { - if err := self.checkRaw(); err != nil { - return err - } - switch self.t { - case _V_ARRAY_LAZY: - return self.skipAllIndex() - case _V_OBJECT_LAZY: - return self.skipAllKey() - default: - return self.Check() + case _V_ARRAY_LAZY: self.loadAllIndex(true) + case _V_OBJECT_LAZY: self.loadAllKey(true) + case V_ERROR: return self + case V_NONE: return nil + } + if self.m == nil { + self.m = new(sync.RWMutex) } + return self.checkRaw() } /**---------------------------------- Internal Helper Methods ----------------------------------**/ -func (self *Node) should(t types.ValueType, s string) error { +func (self *Node) should(t types.ValueType) error { if err := self.checkRaw(); err != nil { return err } @@ -1439,28 +1420,38 @@ func (self *Node) skipIndexPair(index int) *Pair { return nil } -func (self *Node) loadAllIndex() error { +func (self *Node) loadAllIndex(loadOnce bool) error { if !self.isLazy() { return nil } var err types.ParsingError parser, stack := self.getParserAndArrayStack() - parser.noLazy = true - *self, err = parser.decodeArray(&stack.v) + if !loadOnce { + parser.noLazy = true + *self, err = parser.decodeArray(&stack.v) + } else { + parser.loadOnce = true + *self, err = parser.decodeArray(&stack.v) + } if err != 0 { return parser.ExportError(err) } return nil } -func (self *Node) loadAllKey() error { +func (self *Node) loadAllKey(loadOnce bool) error { if !self.isLazy() { return nil } var err types.ParsingError parser, stack := self.getParserAndObjectStack() - parser.noLazy = true - *self, err = parser.decodeObject(&stack.v) + if !loadOnce { + parser.noLazy = true + *self, err = parser.decodeObject(&stack.v) + } else { + parser.loadOnce = true + *self, err = parser.decodeObject(&stack.v) + } if err != 0 { return parser.ExportError(err) } @@ -1629,7 +1620,23 @@ func NewRaw(json string) Node { if it == _V_NONE { return Node{} } - return newRawNode(parser.s[start:parser.p], it) + return newRawNode(parser.s[start:parser.p], it, false) +} + +// NewRawConcurrentRead creates a node of raw json, which can be READ +// (GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON) concurrently. +// If the input json is invalid, NewRaw returns a error Node. +func NewRawConcurrentRead(json string) Node { + parser := NewParserObj(json) + start, err := parser.skip() + if err != 0 { + return *newError(err, err.Message()) + } + it := switchRawType(parser.s[start]) + if it == _V_NONE { + return Node{} + } + return newRawNode(parser.s[start:parser.p], it, true) } // NewAny creates a node of type V_ANY if any's type isn't Node or *Node, @@ -1689,15 +1696,15 @@ func NewNumber(v string) Node { } } -func (node Node) toNumber() json.Number { +func (node *Node) toNumber() json.Number { return json.Number(rt.StrFrom(node.p, int64(node.l))) } -func (self Node) toString() string { +func (self *Node) toString() string { return rt.StrFrom(self.p, int64(self.l)) } -func (node Node) toFloat64() (float64, error) { +func (node *Node) toFloat64() (float64, error) { ret, err := node.toNumber().Float64() if err != nil { return 0, err @@ -1705,7 +1712,7 @@ func (node Node) toFloat64() (float64, error) { return ret, nil } -func (node Node) toInt64() (int64, error) { +func (node *Node) toInt64() (int64, error) { ret,err := node.toNumber().Int64() if err != nil { return 0, err @@ -1777,48 +1784,34 @@ func (self *Node) setObject(v *linkedPairs) { self.p = unsafe.Pointer(v) } -func newRawNode(str string, typ types.ValueType) Node { - return Node{ - t: _V_RAW | typ, - p: rt.StrPtr(str), - l: uint(len(str)), - } -} - func (self *Node) parseRaw(full bool) { + lock := self.lock() + defer self.unlock() + if !self.isRaw() { + return + } raw := self.toString() parser := NewParserObj(raw) + var e types.ParsingError if full { parser.noLazy = true - parser.skipValue = false + *self, e = parser.Parse() + } else if lock { + var n Node + parser.noLazy = true + parser.loadOnce = true + n, e = parser.Parse() + self.assign(n) + } else { + *self, e = parser.Parse() } - var e types.ParsingError - *self, e = parser.Parse() if e != 0 { *self = *newSyntaxError(parser.syntaxError(e)) } } -var typeJumpTable = [256]types.ValueType{ - '"' : types.V_STRING, - '-' : _V_NUMBER, - '0' : _V_NUMBER, - '1' : _V_NUMBER, - '2' : _V_NUMBER, - '3' : _V_NUMBER, - '4' : _V_NUMBER, - '5' : _V_NUMBER, - '6' : _V_NUMBER, - '7' : _V_NUMBER, - '8' : _V_NUMBER, - '9' : _V_NUMBER, - '[' : types.V_ARRAY, - 'f' : types.V_FALSE, - 'n' : types.V_NULL, - 't' : types.V_TRUE, - '{' : types.V_OBJECT, -} - -func switchRawType(c byte) types.ValueType { - return typeJumpTable[c] +func (self *Node) assign(n Node) { + self.l = n.l + self.p = n.p + atomic.StoreInt64(&self.t, n.t) } diff --git a/ast/node_test.go b/ast/node_test.go index ae40c18c..60a2aade 100644 --- a/ast/node_test.go +++ b/ast/node_test.go @@ -17,18 +17,18 @@ package ast import ( - `bytes` - `encoding/json` - `errors` - `fmt` - `reflect` - `strconv` - `testing` - - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` - `github.com/stretchr/testify/assert` - `github.com/stretchr/testify/require` + "bytes" + "encoding/json" + "errors" + "fmt" + "reflect" + "strconv" + "testing" + + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestNodeSortKeys(t *testing.T) { @@ -151,20 +151,21 @@ func TestLoadAll(t *testing.T) { if err = root.Load(); err != nil { t.Fatal(err) } - if root.len() != 3 { - t.Fatal(root.len()) + + if l, _ := root.Len(); l != 3 { + t.Fatal(root.Len()) } c := root.Get("c") - if !c.IsRaw() { + if !c.isRaw() { t.Fatal(err) } err = c.LoadAll() if err != nil { t.Fatal(err) } - if c.len() != 2 { - t.Fatal(c.len()) + if l, _ := c.Len(); l != 2 { + t.Fatal(c.Len()) } c1 := c.nodeAt(0) if n, err := c1.Int64(); err != nil || n != 1 { @@ -174,47 +175,47 @@ func TestLoadAll(t *testing.T) { a := root.pairAt(0) if a.Key != "a" { t.Fatal(a.Key) - } else if !a.Value.IsRaw() { + } else if !a.Value.isRaw() { t.Fatal(a.Value.itype()) - } else if n, err := a.Value.Len(); n != 0 || err != nil { + } else if n, err := a.Value.Len(); n != 2 || err != nil { t.Fatal(n, err) } if err := a.Value.Load(); err != nil { t.Fatal(err) } - if a.Value.len() != 2 { - t.Fatal(a.Value.len()) + if l, _ := a.Value.Len(); l != 2 { + t.Fatal(a.Value.Len()) } a1 := a.Value.Get("1") - if !a1.IsRaw() { + if !a1.isRaw() { t.Fatal(a1) } a.Value.LoadAll() - if a1.t != types.V_ARRAY || a1.len() != 1 { - t.Fatal(a1.t, a1.len()) + if l, _ := a1.Len(); a1.t != types.V_ARRAY || l != 1 { + t.Fatal(a1.t) } b := root.pairAt(1) if b.Key != "b" { t.Fatal(b.Key) - } else if !b.Value.IsRaw() { + } else if !b.Value.isRaw() { t.Fatal(b.Value.itype()) - } else if n, err := b.Value.Len(); n != 0 || err != nil { + } else if n, err := b.Value.Len(); n != 2 || err != nil { t.Fatal(n, err) } if err := b.Value.Load(); err != nil { t.Fatal(err) } - if b.Value.len() != 2 { - t.Fatal(b.Value.len()) + if l, _ := b.Value.Len(); l != 2 { + t.Fatal(b.Value.Len()) } b1 := b.Value.Index(0) - if !b1.IsRaw() { + if !b1.isRaw() { t.Fatal(b1) } b.Value.LoadAll() - if b1.t != types.V_OBJECT || b1.len() != 1 { - t.Fatal(a1.t, a1.len()) + if l, _ := b1.Len(); b1.t != types.V_OBJECT || l != 1 { + t.Fatal(a1.Len()) } } @@ -699,12 +700,12 @@ func TestCheckError_Empty(t *testing.T) { t.Fatal() } - n := newRawNode("[hello]", types.V_ARRAY) + n := newRawNode("[hello]", types.V_ARRAY, false) n.parseRaw(false) if n.Check() != nil { t.Fatal(n.Check()) } - n = newRawNode("[hello]", types.V_ARRAY) + n = newRawNode("[hello]", types.V_ARRAY, false) n.parseRaw(true) p := NewParser("[hello]") p.noLazy = true @@ -735,7 +736,7 @@ func TestCheckError_Empty(t *testing.T) { if e != nil { t.Fatal(e) } - exist, e := a.Set("d", newRawNode("x", types.V_OBJECT)) + exist, e := a.Set("d", newRawNode("x", types.V_OBJECT, false)) if exist || e != nil { t.Fatal(err) } @@ -746,7 +747,7 @@ func TestCheckError_Empty(t *testing.T) { if d.Check() == nil { t.Fatal(d) } - exist, e = a.Set("e", newRawNode("[}", types.V_ARRAY)) + exist, e = a.Set("e", newRawNode("[}", types.V_ARRAY, false)) if e != nil { t.Fatal(e) } @@ -839,7 +840,7 @@ func TestUnset(t *testing.T) { *entities = NewRaw(string(out)) hashtags := entities.Get("hashtags").Index(0) - hashtags.Set("text2", newRawNode(`{}`, types.V_OBJECT)) + hashtags.Set("text2", NewRaw(`{}`)) exist, err = hashtags.Unset("indices") // NOTICE: Unset() won't change node.Len() here if !exist || err != nil || hashtags.len() != 2 { t.Fatal(hashtags.len()) diff --git a/ast/parser.go b/ast/parser.go index 506f9d86..6f95ed2e 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -17,10 +17,12 @@ package ast import ( - `fmt` + "fmt" + "sync" + "sync/atomic" - `github.com/bytedance/sonic/internal/native/types` - `github.com/bytedance/sonic/internal/rt` + "github.com/bytedance/sonic/internal/native/types" + "github.com/bytedance/sonic/internal/rt" ) const ( @@ -45,10 +47,13 @@ type Parser struct { p int s string noLazy bool + loadOnce bool skipValue bool dbuf *byte } +// var noLazy = option.AstSafeConcurrentRead + /** Parser Private Methods **/ func (self *Parser) delim() types.ParsingError { @@ -152,7 +157,7 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } - val = newRawNode(self.s[start:self.p], t) + val = newRawNode(self.s[start:self.p], t, false) }else{ /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -238,7 +243,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } - val = newRawNode(self.s[start:self.p], t) + val = newRawNode(self.s[start:self.p], t, false) } else { /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -295,6 +300,10 @@ func (self *Parser) Pos() int { return self.p } + +// Parse returns a ast.Node representing the parser's JSON. +// NOTICE: the specific parsing lazy dependens parser's option +// It only parse first layer and first child for Object or Array be default func (self *Parser) Parse() (Node, types.ParsingError) { switch val := self.decodeValue(); val.Vt { case types.V_EOF : return Node{}, types.ERR_EOF @@ -303,22 +312,48 @@ func (self *Parser) Parse() (Node, types.ParsingError) { case types.V_FALSE : return falseNode, 0 case types.V_STRING : return self.decodeString(val.Iv, val.Ep) case types.V_ARRAY: + s := self.p - 1; if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' { self.p = p + 1 return Node{t: types.V_ARRAY}, 0 } if self.noLazy { + if self.loadOnce { + self.noLazy = false + } return self.decodeArray(new(linkedNodes)) } + // NOTICE: loadOnce always keep raw json for object or array + if self.loadOnce { + self.p = s + s, e := self.skipFast() + if e != 0 { + return Node{}, e + } + return newRawNode(self.s[s:self.p], types.V_ARRAY, true), 0 + } return newLazyArray(self), 0 case types.V_OBJECT: + s := self.p - 1; if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' { self.p = p + 1 return Node{t: types.V_OBJECT}, 0 } + // NOTICE: loadOnce always keep raw json for object or array if self.noLazy { + if self.loadOnce { + self.noLazy = false + } return self.decodeObject(new(linkedPairs)) } + if self.loadOnce { + self.p = s + s, e := self.skipFast() + if e != 0 { + return Node{}, e + } + return newRawNode(self.s[s:self.p], types.V_OBJECT, true), 0 + } return newLazyObject(self), 0 case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0 case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0 @@ -475,7 +510,7 @@ func (self *Node) skipNextNode() *Node { if t == _V_NONE { return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) } - val = newRawNode(parser.s[start:parser.p], t) + val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ @@ -558,7 +593,7 @@ func (self *Node) skipNextPair() (*Pair) { if t == _V_NONE { return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} } - val = newRawNode(parser.s[start:parser.p], t) + val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ @@ -662,3 +697,72 @@ func backward(src string, i int) int { for ; i>=0 && isSpace(src[i]); i-- {} return i } + + +func newRawNode(str string, typ types.ValueType, lock bool) Node { + ret := Node{ + t: typ | _V_RAW, + p: rt.StrPtr(str), + l: uint(len(str)), + } + if lock { + ret.m = new(sync.RWMutex) + } + return ret +} + +var typeJumpTable = [256]types.ValueType{ + '"' : types.V_STRING, + '-' : _V_NUMBER, + '0' : _V_NUMBER, + '1' : _V_NUMBER, + '2' : _V_NUMBER, + '3' : _V_NUMBER, + '4' : _V_NUMBER, + '5' : _V_NUMBER, + '6' : _V_NUMBER, + '7' : _V_NUMBER, + '8' : _V_NUMBER, + '9' : _V_NUMBER, + '[' : types.V_ARRAY, + 'f' : types.V_FALSE, + 'n' : types.V_NULL, + 't' : types.V_TRUE, + '{' : types.V_OBJECT, +} + +func switchRawType(c byte) types.ValueType { + return typeJumpTable[c] +} + +func (self *Node) loadt() types.ValueType { + return (types.ValueType)(atomic.LoadInt64(&self.t)) +} + +func (self *Node) lock() bool { + if m := self.m; m != nil { + m.Lock() + return true + } + return false +} + +func (self *Node) unlock() { + if m := self.m; m != nil { + m.Unlock() + } +} + +func (self *Node) rlock() bool { + if m := self.m; m != nil { + m.RLock() + return true + } + return false +} + +func (self *Node) runlock() { + if m := self.m; m != nil { + m.RUnlock() + } +} diff --git a/ast/parser_test.go b/ast/parser_test.go index 2469bc41..594f3437 100644 --- a/ast/parser_test.go +++ b/ast/parser_test.go @@ -17,16 +17,16 @@ package ast import ( - `encoding/json` - `os` - `runtime` - `runtime/debug` - `sync` - `testing` - `time` + "encoding/json" + "os" + "runtime" + "runtime/debug" + "sync" + "testing" + "time" - `github.com/stretchr/testify/assert` - `github.com/stretchr/testify/require` + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) var ( @@ -316,6 +316,46 @@ func BenchmarkParseOne_Parallel_Sonic(b *testing.B) { }) } +func BenchmarkParseNoLazy_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + ast := NewRawConcurrentRead(_TwitterJson) + for i := 0; i < b.N; i++ { + node := ast.GetByPath("statuses", 3) + if node.Check() != nil { + b.Fail() + } + } +} + +func BenchmarkParseNoLazy_Parallel_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + ast := NewRawConcurrentRead(_TwitterJson) + b.RunParallel(func(p *testing.PB) { + for p.Next() { + node := ast.GetByPath("statuses", 3) + if node.Check() != nil { + b.Fail() + } + } + }) +} + +func BenchmarkNodeRaw_Parallel_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.ResetTimer() + ast := NewRawConcurrentRead(_TwitterJson) + b.RunParallel(func(p *testing.PB) { + for p.Next() { + node := ast.GetByPath("statuses", 3) + if _, e := node.Raw(); e != nil { + b.Fatal(e) + } + } + }) +} + func BenchmarkParseSeven_Sonic(b *testing.B) { b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() diff --git a/ast/search.go b/ast/search.go index a8d1e76f..9a5fb942 100644 --- a/ast/search.go +++ b/ast/search.go @@ -21,8 +21,23 @@ import ( `github.com/bytedance/sonic/internal/native/types` ) +// SearchOptions controls Searcher's behavior +type SearchOptions struct { + // ValidateJSON indicates the searcher to validate the entire JSON + ValidateJSON bool + + // CopyReturn indicates the searcher to copy the result JSON instead of refer from the input + // This can help to reduce memory usage if you cache the results + CopyReturn bool + + // ConcurrentRead indicates the searcher to return a concurrently-READ-safe node, + // including: GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON + ConcurrentRead bool +} + type Searcher struct { parser Parser + SearchOptions } func NewSearcher(str string) *Searcher { @@ -31,12 +46,16 @@ func NewSearcher(str string) *Searcher { s: str, noLazy: false, }, + SearchOptions: SearchOptions{ + ValidateJSON: true, + }, } } // GetByPathCopy search in depth from top json and returns a **Copied** json node at the path location func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) { - return self.getByPath(true, true, path...) + self.CopyReturn = true + return self.getByPath(path...) } // GetByPathNoCopy search in depth from top json and returns a **Referenced** json node at the path location @@ -44,15 +63,15 @@ func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) { // WARN: this search directly refer partial json from top json, which has faster speed, // may consumes more memory. func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { - return self.getByPath(false, true, path...) + return self.getByPath(path...) } -func (self *Searcher) getByPath(copystring bool, validate bool, path ...interface{}) (Node, error) { +func (self *Searcher) getByPath(path ...interface{}) (Node, error) { var err types.ParsingError var start int self.parser.p = 0 - start, err = self.parser.getByPath(validate, path...) + start, err = self.parser.getByPath(self.ValidateJSON, path...) if err != 0 { // for compatibility with old version if err == types.ERR_NOT_FOUND { @@ -71,12 +90,12 @@ func (self *Searcher) getByPath(copystring bool, validate bool, path ...interfac // copy string to reducing memory usage var raw string - if copystring { + if self.CopyReturn { raw = rt.Mem2Str([]byte(self.parser.s[start:self.parser.p])) } else { raw = self.parser.s[start:self.parser.p] } - return newRawNode(raw, t), nil + return newRawNode(raw, t, self.ConcurrentRead), nil } // GetByPath searches a path and returns relaction and types of target diff --git a/ast/search_test.go b/ast/search_test.go index 6cbacc50..2796e3af 100644 --- a/ast/search_test.go +++ b/ast/search_test.go @@ -25,6 +25,7 @@ import ( "testing" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) func TestGC_Search(t *testing.T) { @@ -54,6 +55,72 @@ func TestGC_Search(t *testing.T) { wg.Wait() } + +func TestNodeRace(t *testing.T) { + + src := `{"1":1,"2": [ 1 , 1 , { "3" : 1 , "4" : [] } ] }` + s := NewSearcher(src) + s.ConcurrentRead = true + node, _ := s.GetByPath() + + cases := []struct{ + path []interface{} + exp []string + scalar bool + lv int + }{ + {[]interface{}{"1"}, []string{`1`}, true, 0}, + {[]interface{}{"2"}, []string{`[ 1 , 1 , { "3" : 1 , "4" : [] } ]`, `[1,1,{ "3" : 1 , "4" : [] }]`, `[1,1,{"3":1,"4":[]}]`}, false, 3}, + {[]interface{}{"2", 1}, []string{`1`}, true, 1}, + {[]interface{}{"2", 2}, []string{`{ "3" : 1 , "4" : [] }`, `{"3":1,"4":[]}`}, false, 2}, + {[]interface{}{"2", 2, "3"}, []string{`1`}, true, 0}, + {[]interface{}{"2", 2, "4"}, []string{`[]`}, false, 0}, + } + + wg := sync.WaitGroup{} + start := sync.RWMutex{} + start.Lock() + + P := 1000 + for i := range cases { + // println(i) + c := cases[i] + for j := 0; j < P; j++ { + wg.Add(1) + go func () { + defer wg.Done() + start.RLock() + n := node.GetByPath(c.path...) + _ = n.Type() + v, err := n.Raw() + iv, _ := n.Int64() + lv, _ := n.Len() + _, e := n.Interface() + e2 := n.SortKeys(false) + require.NoError(t, err) + require.NoError(t, e) + require.NoError(t, e2) + if c.scalar { + require.Equal(t, int64(1), iv) + } else { + require.Equal(t, c.lv, lv) + } + eq := false + for _, exp := range c.exp { + if exp == v { + eq = true + break + } + } + require.True(t, eq) + }() + } + } + + start.Unlock() + wg.Wait() +} + func TestExportErrorInvalidChar(t *testing.T) { data := `{"a":]` p := NewSearcher(data) @@ -325,6 +392,22 @@ func BenchmarkGetOne_Sonic(b *testing.B) { } } +func BenchmarkGetOneSafe_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + ast := NewSearcher(_TwitterJson) + ast.ConcurrentRead = true + for i := 0; i < b.N; i++ { + node, err := ast.GetByPath("statuses", 3, "id") + if err != nil { + b.Fatal(err) + } + x, _ := node.Int64() + if x != 249279667666817024 { + b.Fatal(node.Interface()) + } + } +} + func BenchmarkGetFull_Sonic(b *testing.B) { ast := NewSearcher(_TwitterJson) b.SetBytes(int64(len(_TwitterJson))) @@ -370,6 +453,24 @@ func BenchmarkGetOne_Parallel_Sonic(b *testing.B) { }) } +func BenchmarkGetOneSafe_Parallel_Sonic(b *testing.B) { + b.SetBytes(int64(len(_TwitterJson))) + b.RunParallel(func(pb *testing.PB) { + ast := NewSearcher(_TwitterJson) + ast.ConcurrentRead = true + for pb.Next() { + node, err := ast.GetByPath("statuses", 3, "id") + if err != nil { + b.Fatal(err) + } + x, _ := node.Int64() + if x != 249279667666817024 { + b.Fatal(node.Interface()) + } + } + }) +} + func BenchmarkSetOne_Sonic(b *testing.B) { node, err := NewSearcher(_TwitterJson).GetByPath("statuses", 3) if err != nil { diff --git a/ast/visitor_test.go b/ast/visitor_test.go index 2618c546..221996df 100644 --- a/ast/visitor_test.go +++ b/ast/visitor_test.go @@ -225,9 +225,7 @@ func (self *visitorNodeDiffTest) OnObjectEnd() error { require.NotNil(self.t, object) node := self.stk[self.sp-1].Node - ps, err := node.unsafeMap() - var pairs = make([]Pair, ps.Len()) - ps.ToSlice(pairs) + pairs, err := node.MapUseNode() require.NoError(self.t, err) keysGot := make([]string, 0, len(object)) @@ -235,16 +233,16 @@ func (self *visitorNodeDiffTest) OnObjectEnd() error { keysGot = append(keysGot, key) } keysWant := make([]string, 0, len(pairs)) - for _, pair := range pairs { - keysWant = append(keysWant, pair.Key) + for key := range pairs { + keysWant = append(keysWant, key) } sort.Strings(keysGot) sort.Strings(keysWant) require.EqualValues(self.t, keysWant, keysGot) - for _, pair := range pairs { - typeGot := object[pair.Key].Type() - typeWant := pair.Value.Type() + for key, pair := range pairs { + typeGot := object[key].Type() + typeWant := pair.Type() require.EqualValues(self.t, typeWant, typeGot) } @@ -278,10 +276,8 @@ func (self *visitorNodeDiffTest) OnArrayEnd() error { require.NotNil(self.t, array) node := self.stk[self.sp-1].Node - vs, err := node.unsafeArray() + values, err := node.ArrayUseNode() require.NoError(self.t, err) - var values = make([]Node, vs.Len()) - vs.ToSlice(values) require.EqualValues(self.t, len(values), len(array)) @@ -470,13 +466,13 @@ func (self *visitorUserNodeASTDecoder) decodeValue(root *Node) (visitorUserNode, value, ierr, ferr) case V_ARRAY: - nodes, err := root.unsafeArray() + nodes, err := root.ArrayUseNode() if err != nil { return nil, err } - values := make([]visitorUserNode, nodes.Len()) - for i := 0; i