diff --git a/api.go b/api.go index 09332912..93dac58e 100644 --- a/api.go +++ b/api.go @@ -189,6 +189,12 @@ func Get(src []byte, path ...interface{}) (ast.Node, error) { return GetCopyFromString(rt.Mem2Str(src), path...) } +func GetWithOptions(src []byte, opts ast.SearchOptions, path ...interface{}) (ast.Node, error) { + s := ast.NewSearcher(rt.Mem2Str(src)) + s.SearchOptions = opts + return s.GetByPath(path...) +} + // GetFromString is same with Get except src is string. // // WARNING: The returned JSON is **Referenced** from the input. diff --git a/ast/encode.go b/ast/encode.go index dc519e33..b0671d61 100644 --- a/ast/encode.go +++ b/ast/encode.go @@ -90,12 +90,8 @@ func quoteString(e *[]byte, s string) { var bytesPool = sync.Pool{} func (self *Node) MarshalJSON() ([]byte, error) { - return self.marshalJSON(noLazy) -} - -func (self *Node) marshalJSON(locked bool) ([]byte, error) { buf := newBuffer() - err := self.encode(buf, locked) + err := self.encode(buf) if err != nil { freeBuffer(buf) return nil, err @@ -111,7 +107,7 @@ func newBuffer() *[]byte { if ret := bytesPool.Get(); ret != nil { return ret.(*[]byte) } else { - buf := make([]byte, 0, option.DefaultEncoderBufferSize) + buf := make([]byte, 0, option.DefaultAstEncoderBufferSize) return &buf } } @@ -121,9 +117,9 @@ func freeBuffer(buf *[]byte) { bytesPool.Put(buf) } -func (self *Node) encode(buf *[]byte, locked bool) error { +func (self *Node) encode(buf *[]byte) error { if self.IsRaw() { - return self.encodeRaw(buf, locked) + return self.encodeRaw(buf) } switch int(self.itype()) { case V_NONE : return ErrNotExist @@ -140,16 +136,14 @@ func (self *Node) encode(buf *[]byte, locked bool) error { } } -func (self *Node) encodeRaw(buf *[]byte, locked bool) error { - if locked { - self.rlock() - if !self.IsRaw() { - self.runlock() - return self.encode(buf, false) - } +func (self *Node) encodeRaw(buf *[]byte) error { + lock := self.rlock() + if !self.IsRaw() { + self.runlock() + return self.encode(buf) } raw := self.toString() - if locked { + if lock { self.runlock() } *buf = append(*buf, raw...) @@ -212,7 +206,7 @@ func (self *Node) encodeArray(buf *[]byte) error { *buf = append(*buf, ',') } started = true - if err := n.encode(buf, true); err != nil { + if err := n.encode(buf); err != nil { return err } } @@ -221,16 +215,16 @@ func (self *Node) encodeArray(buf *[]byte) error { return nil } -func (self *Pair) encode(buf *[]byte, locked bool) error { +func (self *Pair) encode(buf *[]byte) error { if len(*buf) == 0 { *buf = append(*buf, '"', '"', ':') - return self.Value.encode(buf, locked) + return self.Value.encode(buf) } quote(buf, self.Key) *buf = append(*buf, ':') - return self.Value.encode(buf, locked) + return self.Value.encode(buf) } func (self *Node) encodeObject(buf *[]byte) error { @@ -258,7 +252,7 @@ func (self *Node) encodeObject(buf *[]byte) error { *buf = append(*buf, ',') } started = true - if err := n.encode(buf, true); err != nil { + if err := n.encode(buf); err != nil { return err } } diff --git a/ast/node.go b/ast/node.go index 879269a1..f51ec28a 100644 --- a/ast/node.go +++ b/ast/node.go @@ -134,18 +134,16 @@ func (self *Node) Raw() (string, error) { if self == nil { return "", ErrNotExist } - if noLazy { - self.rlock() - } + lock := self.rlock() if !self.IsRaw() { - if noLazy { + if lock { self.runlock() } - buf, err := self.marshalJSON(false) + buf, err := self.MarshalJSON() return rt.Mem2Str(buf), err } ret := self.toString() - if noLazy { + if lock { self.runlock() } return ret, nil @@ -1640,7 +1638,23 @@ func NewRaw(json string) Node { if it == _V_NONE { return Node{} } - return newRawNode(parser.s[start:parser.p], it) + return newRawNode(parser.s[start:parser.p], it, false) +} + +// NewRawConcurrentRead creates a node of raw json, which can be READ +// (GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON) concurrently. +// If the input json is invalid, NewRaw returns a error Node. +func NewRawConcurrentRead(json string) Node { + parser := NewParserObj(json) + start, err := parser.skip() + if err != 0 { + return *newError(err, err.Message()) + } + it := switchRawType(parser.s[start]) + if it == _V_NONE { + return Node{} + } + return newRawNode(parser.s[start:parser.p], it, true) } // NewAny creates a node of type V_ANY if any's type isn't Node or *Node, diff --git a/ast/node_test.go b/ast/node_test.go index ae40c18c..4b9c7cc5 100644 --- a/ast/node_test.go +++ b/ast/node_test.go @@ -699,12 +699,12 @@ func TestCheckError_Empty(t *testing.T) { t.Fatal() } - n := newRawNode("[hello]", types.V_ARRAY) + n := newRawNode("[hello]", types.V_ARRAY, false) n.parseRaw(false) if n.Check() != nil { t.Fatal(n.Check()) } - n = newRawNode("[hello]", types.V_ARRAY) + n = newRawNode("[hello]", types.V_ARRAY, false) n.parseRaw(true) p := NewParser("[hello]") p.noLazy = true @@ -735,7 +735,7 @@ func TestCheckError_Empty(t *testing.T) { if e != nil { t.Fatal(e) } - exist, e := a.Set("d", newRawNode("x", types.V_OBJECT)) + exist, e := a.Set("d", newRawNode("x", types.V_OBJECT, false)) if exist || e != nil { t.Fatal(err) } @@ -746,7 +746,7 @@ func TestCheckError_Empty(t *testing.T) { if d.Check() == nil { t.Fatal(d) } - exist, e = a.Set("e", newRawNode("[}", types.V_ARRAY)) + exist, e = a.Set("e", newRawNode("[}", types.V_ARRAY, false)) if e != nil { t.Fatal(e) } @@ -839,7 +839,7 @@ func TestUnset(t *testing.T) { *entities = NewRaw(string(out)) hashtags := entities.Get("hashtags").Index(0) - hashtags.Set("text2", newRawNode(`{}`, types.V_OBJECT)) + hashtags.Set("text2", NewRaw(`{}`)) exist, err = hashtags.Unset("indices") // NOTICE: Unset() won't change node.Len() here if !exist || err != nil || hashtags.len() != 2 { t.Fatal(hashtags.len()) diff --git a/ast/parser.go b/ast/parser.go index 7b84fa81..811ff95e 100644 --- a/ast/parser.go +++ b/ast/parser.go @@ -24,7 +24,6 @@ import ( "github.com/bytedance/sonic/internal/native/types" "github.com/bytedance/sonic/internal/rt" - "github.com/bytedance/sonic/option" ) const ( @@ -49,11 +48,12 @@ type Parser struct { p int s string noLazy bool + loadOnce bool skipValue bool dbuf *byte } -var noLazy = option.AstSafeConcurrentRead +// var noLazy = option.AstSafeConcurrentRead /** Parser Private Methods **/ @@ -158,7 +158,7 @@ func (self *Parser) decodeArray(ret *linkedNodes) (Node, types.ParsingError) { if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } - val = newRawNode(self.s[start:self.p], t) + val = newRawNode(self.s[start:self.p], t, false) }else{ /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -244,7 +244,7 @@ func (self *Parser) decodeObject(ret *linkedPairs) (Node, types.ParsingError) { if t == _V_NONE { return Node{}, types.ERR_INVALID_CHAR } - val = newRawNode(self.s[start:self.p], t) + val = newRawNode(self.s[start:self.p], t, false) } else { /* decode the value */ if val, err = self.Parse(); err != 0 { @@ -332,7 +332,7 @@ func (self *Node) skipNextNode() *Node { if t == _V_NONE { return newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR)) } - val = newRawNode(parser.s[start:parser.p], t) + val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ @@ -415,7 +415,7 @@ func (self *Node) skipNextPair() (*Pair) { if t == _V_NONE { return &Pair{key, *newSyntaxError(parser.syntaxError(types.ERR_INVALID_CHAR))} } - val = newRawNode(parser.s[start:parser.p], t) + val = newRawNode(parser.s[start:parser.p], t, false) } /* add the value to result */ @@ -538,15 +538,19 @@ func (self *Parser) Parse() (Node, types.ParsingError) { return Node{t: types.V_ARRAY}, 0 } if self.noLazy { + if self.loadOnce { + self.noLazy = false + } return self.decodeArray(new(linkedNodes)) } - if noLazy { + // NOTICE: loadOnce always keep raw json for object or array + if self.loadOnce { self.p = s s, e := self.skipFast() if e != 0 { return Node{}, e } - return newRawNode(self.s[s:self.p], types.V_ARRAY), 0 + return newRawNode(self.s[s:self.p], types.V_ARRAY, true), 0 } return newLazyArray(self), 0 case types.V_OBJECT: @@ -555,16 +559,20 @@ func (self *Parser) Parse() (Node, types.ParsingError) { self.p = p + 1 return Node{t: types.V_OBJECT}, 0 } + // NOTICE: loadOnce always keep raw json for object or array if self.noLazy { + if self.loadOnce { + self.noLazy = false + } return self.decodeObject(new(linkedPairs)) } - if noLazy { + if self.loadOnce { self.p = s s, e := self.skipFast() if e != 0 { return Node{}, e } - return newRawNode(self.s[s:self.p], types.V_OBJECT), 0 + return newRawNode(self.s[s:self.p], types.V_OBJECT, true), 0 } return newLazyObject(self), 0 case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0 @@ -573,53 +581,23 @@ func (self *Parser) Parse() (Node, types.ParsingError) { } } -// Parse returns a ast.Node representing the parser's JSON. -// NOTICE: the specific parsing lazy dependens parser's option -// It only parse first layer and all chidren for Object or Array by default -func (self *Parser) ParseNoLazy() (Node, types.ParsingError) { - switch val := self.decodeValue(); val.Vt { - case types.V_EOF : return Node{}, types.ERR_EOF - case types.V_NULL : return nullNode, 0 - case types.V_TRUE : return trueNode, 0 - case types.V_FALSE : return falseNode, 0 - case types.V_STRING : return self.decodeString(val.Iv, val.Ep) - case types.V_ARRAY: - if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == ']' { - self.p = p + 1 - return Node{t: types.V_ARRAY}, 0 - } - return self.decodeArray(new(linkedNodes)) - case types.V_OBJECT: - if p := skipBlank(self.s, self.p); p >= self.p && self.s[p] == '}' { - self.p = p + 1 - return Node{t: types.V_OBJECT}, 0 - } - return self.decodeObject(new(linkedPairs)) - case types.V_DOUBLE : return NewNumber(self.s[val.Ep:self.p]), 0 - case types.V_INTEGER : return NewNumber(self.s[val.Ep:self.p]), 0 - default : return Node{}, types.ParsingError(-val.Vt) - } -} - -func newRawNode(str string, typ types.ValueType) Node { +func newRawNode(str string, typ types.ValueType, lock bool) Node { ret := Node{ t: _V_RAW | typ, p: rt.StrPtr(str), l: uint(len(str)), } - if noLazy { + if lock { ret.m = unsafe.Pointer(new(sync.RWMutex)) } return ret } func (self *Node) parseRaw(full bool) { - if noLazy { - self.lock() - defer self.unlock() - if !self.IsRaw() { - return - } + lock := self.lock() + defer self.unlock() + if !self.IsRaw() { + return } raw := self.toString() parser := NewParserObj(raw) @@ -627,9 +605,11 @@ func (self *Node) parseRaw(full bool) { if full { parser.noLazy = true *self, e = parser.Parse() - } else if noLazy { + } else if lock { var n Node - n, e = parser.ParseNoLazy() + parser.noLazy = true + parser.loadOnce = true + n, e = parser.Parse() self.assign(n) } else { *self, e = parser.Parse() @@ -673,10 +653,12 @@ func (self *Node) loadm() *sync.RWMutex { return (*sync.RWMutex)(atomic.LoadPointer(&self.m)) } -func (self *Node) lock() { +func (self *Node) lock() bool { if m := self.loadm(); m != nil { m.Lock() + return true } + return false } func (self *Node) unlock() { @@ -685,10 +667,12 @@ func (self *Node) unlock() { } } -func (self *Node) rlock() { +func (self *Node) rlock() bool { if m := self.loadm(); m != nil { m.RLock() + return true } + return false } func (self *Node) runlock() { diff --git a/ast/parser_test.go b/ast/parser_test.go index e511070a..594f3437 100644 --- a/ast/parser_test.go +++ b/ast/parser_test.go @@ -317,24 +317,21 @@ func BenchmarkParseOne_Parallel_Sonic(b *testing.B) { } func BenchmarkParseNoLazy_Sonic(b *testing.B) { - noLazy = true b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() - ast := NewRaw(_TwitterJson) + ast := NewRawConcurrentRead(_TwitterJson) for i := 0; i < b.N; i++ { node := ast.GetByPath("statuses", 3) if node.Check() != nil { b.Fail() } } - noLazy = false } func BenchmarkParseNoLazy_Parallel_Sonic(b *testing.B) { - noLazy = true b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() - ast := NewRaw(_TwitterJson) + ast := NewRawConcurrentRead(_TwitterJson) b.RunParallel(func(p *testing.PB) { for p.Next() { node := ast.GetByPath("statuses", 3) @@ -343,14 +340,12 @@ func BenchmarkParseNoLazy_Parallel_Sonic(b *testing.B) { } } }) - noLazy = false } func BenchmarkNodeRaw_Parallel_Sonic(b *testing.B) { - noLazy = true b.SetBytes(int64(len(_TwitterJson))) b.ResetTimer() - ast := NewRaw(_TwitterJson) + ast := NewRawConcurrentRead(_TwitterJson) b.RunParallel(func(p *testing.PB) { for p.Next() { node := ast.GetByPath("statuses", 3) @@ -359,7 +354,6 @@ func BenchmarkNodeRaw_Parallel_Sonic(b *testing.B) { } } }) - noLazy = false } func BenchmarkParseSeven_Sonic(b *testing.B) { diff --git a/ast/search.go b/ast/search.go index a8d1e76f..53291064 100644 --- a/ast/search.go +++ b/ast/search.go @@ -21,8 +21,23 @@ import ( `github.com/bytedance/sonic/internal/native/types` ) +// SearchOptions controls Searcher's behavior +type SearchOptions struct { + // ValidateJSON indicates the searcher to validate the entire JSON + ValidateJSON bool + + // CopyReturn indicates the searcher to copy the result JSON instead of refer from the input + // This can help to reduce memory usage if you cache the results + CopyReturn bool + + // ConcurrentRead indicates the searcher to return a concurrently-READ-safe node, + // including: GetByPath/Get/Index/GetOrIndex/Int64/Bool/Float64/String/Number/Interface/Array/Map/Raw/MarshalJSON + ConcurrentRead bool +} + type Searcher struct { parser Parser + SearchOptions } func NewSearcher(str string) *Searcher { @@ -36,7 +51,8 @@ func NewSearcher(str string) *Searcher { // GetByPathCopy search in depth from top json and returns a **Copied** json node at the path location func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) { - return self.getByPath(true, true, path...) + self.CopyReturn = true + return self.getByPath(path...) } // GetByPathNoCopy search in depth from top json and returns a **Referenced** json node at the path location @@ -44,15 +60,15 @@ func (self *Searcher) GetByPathCopy(path ...interface{}) (Node, error) { // WARN: this search directly refer partial json from top json, which has faster speed, // may consumes more memory. func (self *Searcher) GetByPath(path ...interface{}) (Node, error) { - return self.getByPath(false, true, path...) + return self.getByPath(path...) } -func (self *Searcher) getByPath(copystring bool, validate bool, path ...interface{}) (Node, error) { +func (self *Searcher) getByPath(path ...interface{}) (Node, error) { var err types.ParsingError var start int self.parser.p = 0 - start, err = self.parser.getByPath(validate, path...) + start, err = self.parser.getByPath(self.ValidateJSON, path...) if err != 0 { // for compatibility with old version if err == types.ERR_NOT_FOUND { @@ -71,12 +87,12 @@ func (self *Searcher) getByPath(copystring bool, validate bool, path ...interfac // copy string to reducing memory usage var raw string - if copystring { + if self.CopyReturn { raw = rt.Mem2Str([]byte(self.parser.s[start:self.parser.p])) } else { raw = self.parser.s[start:self.parser.p] } - return newRawNode(raw, t), nil + return newRawNode(raw, t, self.ConcurrentRead), nil } // GetByPath searches a path and returns relaction and types of target diff --git a/ast/parser_norace_test.go b/ast/search_norace_test.go similarity index 96% rename from ast/parser_norace_test.go rename to ast/search_norace_test.go index 26eb5298..d41bd78a 100644 --- a/ast/parser_norace_test.go +++ b/ast/search_norace_test.go @@ -25,10 +25,11 @@ import ( func TestNodeRace(t *testing.T) { - noLazy = true src := `{"1":1,"2": [ 1 , 1 , { "3" : 1 , "4" : [] } ] }` - node := NewRaw(src) + s := NewSearcher(src) + s.ConcurrentRead = true + node, _ := s.GetByPath() cases := []struct{ path []interface{} @@ -83,6 +84,4 @@ func TestNodeRace(t *testing.T) { start.Unlock() wg.Wait() - - noLazy = false } diff --git a/option/option.go b/option/option.go index c0a31356..c915b449 100644 --- a/option/option.go +++ b/option/option.go @@ -16,14 +16,15 @@ package option -import `os` - var ( // DefaultDecoderBufferSize is the initial buffer size of StreamDecoder DefaultDecoderBufferSize uint = 128 * 1024 // DefaultEncoderBufferSize is the initial buffer size of Encoder DefaultEncoderBufferSize uint = 128 * 1024 + + // DefaultAstEncoderBufferSize is the initial buffer size of ast.Node.MarshalJSON() + DefaultAstEncoderBufferSize uint = 4 * 1024 ) // CompileOptions includes all options for encoder or decoder compiler. @@ -85,10 +86,3 @@ func WithCompileMaxInlineDepth(depth int) CompileOption { o.MaxInlineDepth = depth } } - -var ( - // AstSafeConcurrentRead indicate that `sonic/ast.Node`` can be concurrently 'Read' - // (for `GetByPath/Get/Index/Int64/String/Bool/Float64/Number/Interface/Map/Array/ForEach/Values/Properties`) - // NOTICE: Use it may cause `ast.Node`` performace decline - AstSafeConcurrentRead = os.Getenv("SONIC_AST_SAFE_CONCURRENT_READ") != "" -) \ No newline at end of file