-
Notifications
You must be signed in to change notification settings - Fork 91
/
Copy pathstage1_find_marks_amd64.go
148 lines (125 loc) · 4.83 KB
/
stage1_find_marks_amd64.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
//go:build !noasm && !appengine && gc
// +build !noasm,!appengine,gc
/*
* MinIO Cloud Storage, (C) 2020 MinIO, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package simdjson
import (
"sync/atomic"
"github.com/klauspost/cpuid/v2"
)
var jsonMarkupTable = [256]bool{
'{': true,
'}': true,
'[': true,
']': true,
',': true,
':': true,
}
func jsonMarkup(b byte) bool {
return jsonMarkupTable[b]
}
func (pj *internalParsedJson) findStructuralIndices() bool {
avx512 := cpuid.CPU.Has(cpuid.AVX512F)
buf := pj.Message
// persistent state across loop
// does the last iteration end with an odd-length sequence of backslashes?
// either 0 or 1, but a 64-bit value
prev_iter_ends_odd_backslash := uint64(0)
// does the previous iteration end inside a double-quote pair?
prev_iter_inside_quote := uint64(0) // either all zeros or all ones
// does the previous iteration end on something that is a predecessor of a
// pseudo-structural character - i.e. whitespace or a structural character
// effectively the very first char is considered to follow "whitespace" for the
// purposes of pseudo-structural character detection so we initialize to 1
prev_iter_ends_pseudo_pred := uint64(1)
error_mask := uint64(0) // for unescaped characters within strings (ASCII code points < 0x20)
indexTotal := 0
// empty bits that are carried over to the next call to flatten_bits_incremental
carried := uint64(0)
// absolute position into message buffer
position := ^uint64(0)
stripped_index := ^uint64(0)
for len(buf) > 0 {
index := indexChan{}
offset := atomic.AddUint64(&pj.buffersOffset, 1)
index.indexes = &pj.buffers[offset%indexSlots]
// In case last index during previous round was stripped back, put it back
if stripped_index != ^uint64(0) {
position += stripped_index
index.indexes[0] = uint32(stripped_index)
index.length = 1
stripped_index = ^uint64(0)
}
var processed uint64
if avx512 {
processed = find_structural_bits_in_slice_avx512(buf[:len(buf) & ^63], &prev_iter_ends_odd_backslash,
&prev_iter_inside_quote, &error_mask,
&prev_iter_ends_pseudo_pred,
index.indexes, &index.length, &carried, &position, pj.ndjson)
} else {
processed = find_structural_bits_in_slice(buf[:len(buf) & ^63], &prev_iter_ends_odd_backslash,
&prev_iter_inside_quote, &error_mask,
&prev_iter_ends_pseudo_pred,
index.indexes, &index.length, &carried, &position, pj.ndjson)
}
// Check if we have at most a single iteration of 64 bytes left, tag on to previous invocation
if uint64(len(buf))-processed <= 64 {
// Process last 64 bytes in larger buffer (to safeguard against reading beyond the end of the buffer)
paddedBuf := [128]byte{}
copy(paddedBuf[:], buf[processed:])
paddedBytes := uint64(len(buf)) - processed
if avx512 {
processed += find_structural_bits_in_slice_avx512(paddedBuf[:paddedBytes], &prev_iter_ends_odd_backslash,
&prev_iter_inside_quote, &error_mask,
&prev_iter_ends_pseudo_pred,
index.indexes, &index.length, &carried, &position, pj.ndjson)
} else {
processed += find_structural_bits_in_slice(paddedBuf[:paddedBytes], &prev_iter_ends_odd_backslash,
&prev_iter_inside_quote, &error_mask,
&prev_iter_ends_pseudo_pred,
index.indexes, &index.length, &carried, &position, pj.ndjson)
}
}
if index.length == 0 { // No structural chars found, so error out
error_mask = ^uint64(0)
break
}
if uint64(len(buf)) == processed { // message processing completed?
// break out if either
// - is there an unmatched quote at the end
// - the ending structural char is not either a '}' (normal json) or a ']' (array style)
if prev_iter_inside_quote != 0 ||
position >= uint64(len(buf)) ||
!(buf[position] == '}' || buf[position] == ']') {
error_mask = ^uint64(0)
break
}
} else if !jsonMarkup(buf[position]) {
// There may be a dangling quote at the end of the index buffer
// Strip it from current index buffer and save for next round
stripped_index = uint64(index.indexes[index.length-1])
position -= stripped_index
index.length -= 1
}
pj.indexChans <- index
indexTotal += index.length
buf = buf[processed:]
position -= processed
}
pj.indexChans <- indexChan{index: -1}
// a valid JSON file cannot have zero structural indexes - we should have found something
return error_mask == 0 && indexTotal > 0
}