Skip to content

Commit 3d565bd

Browse files
committed
fix #994 Allow UTF8StreamJsonParser to be used without canonicalization
Previously, the ReaderBasedJsonParser was used instead, which is less performant when reading from an InputStream (and handling charset decoding in addition to json parsing). This commit updates the JsonFactory factory methods to respect the canonicalization configuration, where previously a canonicalizing implementaiton was always used. I have added guards around both `_symbols.addName` and `_symbols.findName` based on the existing implementation from `SmileParser`. For correctness, only the guards around `addName` are required, but we avoid unnecessary hashing by guarding both.
1 parent f98e22a commit 3d565bd

6 files changed

+116
-55
lines changed

src/main/java/com/fasterxml/jackson/core/JsonFactory.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -1299,7 +1299,7 @@ public JsonParser createNonBlockingByteArrayParser() throws IOException
12991299
// for non-JSON input:
13001300
_requireJSONFactory("Non-blocking source not (yet?) supported for this format (%s)");
13011301
IOContext ctxt = _createNonBlockingContext(null);
1302-
ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChild(_factoryFeatures);
1302+
ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChildOrPlaceholder(_factoryFeatures);
13031303
return new NonBlockingJsonParser(ctxt, _parserFeatures, can);
13041304
}
13051305

@@ -1326,7 +1326,7 @@ public JsonParser createNonBlockingByteBufferParser() throws IOException
13261326
// for non-JSON input:
13271327
_requireJSONFactory("Non-blocking source not (yet?) supported for this format (%s)");
13281328
IOContext ctxt = _createNonBlockingContext(null);
1329-
ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChild(_factoryFeatures);
1329+
ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChildOrPlaceholder(_factoryFeatures);
13301330
return new NonBlockingByteBufferJsonParser(ctxt, _parserFeatures, can);
13311331
}
13321332

@@ -1849,7 +1849,7 @@ protected JsonParser _createParser(DataInput input, IOContext ctxt) throws IOExc
18491849
// Also: while we can't do full bootstrapping (due to read-ahead limitations), should
18501850
// at least handle possible UTF-8 BOM
18511851
int firstByte = ByteSourceJsonBootstrapper.skipUTF8BOM(input);
1852-
ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChild(_factoryFeatures);
1852+
ByteQuadsCanonicalizer can = _byteSymbolCanonicalizer.makeChildOrPlaceholder(_factoryFeatures);
18531853
return new UTF8DataInputJsonParser(ctxt, _parserFeatures, input,
18541854
_objectCodec, can, firstByte);
18551855
}

src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java

+3-8
Original file line numberDiff line numberDiff line change
@@ -257,14 +257,9 @@ public JsonParser constructParser(int parserFeatures, ObjectCodec codec,
257257
int bytesProcessed = _inputPtr - prevInputPtr;
258258

259259
if (enc == JsonEncoding.UTF8) {
260-
/* and without canonicalization, byte-based approach is not performant; just use std UTF-8 reader
261-
* (which is ok for larger input; not so hot for smaller; but this is not a common case)
262-
*/
263-
if (JsonFactory.Feature.CANONICALIZE_FIELD_NAMES.enabledIn(factoryFeatures)) {
264-
ByteQuadsCanonicalizer can = rootByteSymbols.makeChild(factoryFeatures);
265-
return new UTF8StreamJsonParser(_context, parserFeatures, _in, codec, can,
266-
_inputBuffer, _inputPtr, _inputEnd, bytesProcessed, _bufferRecyclable);
267-
}
260+
ByteQuadsCanonicalizer can = rootByteSymbols.makeChildOrPlaceholder(factoryFeatures);
261+
return new UTF8StreamJsonParser(_context, parserFeatures, _in, codec, can,
262+
_inputBuffer, _inputPtr, _inputEnd, bytesProcessed, _bufferRecyclable);
268263
}
269264
return new ReaderBasedJsonParser(_context, parserFeatures, constructReader(), codec,
270265
rootCharSymbols.makeChild(factoryFeatures));

src/main/java/com/fasterxml/jackson/core/json/UTF8DataInputJsonParser.java

+37-15
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,18 @@ public class UTF8DataInputJsonParser
7777
*/
7878
final protected ByteQuadsCanonicalizer _symbols;
7979

80+
/**
81+
* Marker flag to indicate that standard symbol handling is used
82+
* (one with symbol table assisted canonicalization. May be disabled
83+
* in which case alternate stream-line, non-canonicalizing handling
84+
* is used: usually due to set of symbols
85+
* (Object property names) is unbounded and will not benefit from
86+
* canonicalization attempts.
87+
*
88+
* @since 2.16
89+
*/
90+
final protected boolean _symbolsCanonical;
91+
8092
/*
8193
/**********************************************************
8294
/* Parsing state
@@ -127,6 +139,7 @@ public UTF8DataInputJsonParser(IOContext ctxt, int features, DataInput inputData
127139
super(ctxt, features);
128140
_objectCodec = codec;
129141
_symbols = sym;
142+
_symbolsCanonical = sym.isCanonicalizing();
130143
_inputData = inputData;
131144
_nextByte = firstByte;
132145
}
@@ -1580,7 +1593,7 @@ protected final String parseEscapedName(int[] quads, int qlen, int currQuad, int
15801593
}
15811594
quads[qlen++] = pad(currQuad, currQuadBytes);
15821595
}
1583-
String name = _symbols.findName(quads, qlen);
1596+
String name = _symbolsCanonical ? _symbols.findName(quads, qlen) : null;
15841597
if (name == null) {
15851598
name = addName(quads, qlen, currQuadBytes);
15861599
}
@@ -1654,7 +1667,7 @@ protected String _handleOddName(int ch) throws IOException
16541667
}
16551668
quads[qlen++] = currQuad;
16561669
}
1657-
String name = _symbols.findName(quads, qlen);
1670+
String name = _symbolsCanonical ? _symbols.findName(quads, qlen) : null;
16581671
if (name == null) {
16591672
name = addName(quads, qlen, currQuadBytes);
16601673
}
@@ -1754,7 +1767,7 @@ protected String _parseAposName() throws IOException
17541767
}
17551768
quads[qlen++] = pad(currQuad, currQuadBytes);
17561769
}
1757-
String name = _symbols.findName(quads, qlen);
1770+
String name = _symbolsCanonical ? _symbols.findName(quads, qlen) : null;
17581771
if (name == null) {
17591772
name = addName(quads, qlen, currQuadBytes);
17601773
}
@@ -1771,10 +1784,12 @@ private final String findName(int q1, int lastQuadBytes)
17711784
throws JsonParseException, StreamConstraintsException
17721785
{
17731786
q1 = pad(q1, lastQuadBytes);
1774-
// Usually we'll find it from the canonical symbol table already
1775-
String name = _symbols.findName(q1);
1776-
if (name != null) {
1777-
return name;
1787+
if (_symbolsCanonical) {
1788+
// Usually we'll find it from the canonical symbol table already
1789+
String name = _symbols.findName(q1);
1790+
if (name != null) {
1791+
return name;
1792+
}
17781793
}
17791794
// If not, more work. We'll need add stuff to buffer
17801795
_quadBuffer[0] = q1;
@@ -1785,10 +1800,12 @@ private final String findName(int q1, int q2, int lastQuadBytes)
17851800
throws JsonParseException, StreamConstraintsException
17861801
{
17871802
q2 = pad(q2, lastQuadBytes);
1788-
// Usually we'll find it from the canonical symbol table already
1789-
String name = _symbols.findName(q1, q2);
1790-
if (name != null) {
1791-
return name;
1803+
if (_symbolsCanonical) {
1804+
// Usually we'll find it from the canonical symbol table already
1805+
String name = _symbols.findName(q1, q2);
1806+
if (name != null) {
1807+
return name;
1808+
}
17921809
}
17931810
// If not, more work. We'll need add stuff to buffer
17941811
_quadBuffer[0] = q1;
@@ -1800,9 +1817,11 @@ private final String findName(int q1, int q2, int q3, int lastQuadBytes)
18001817
throws JsonParseException, StreamConstraintsException
18011818
{
18021819
q3 = pad(q3, lastQuadBytes);
1803-
String name = _symbols.findName(q1, q2, q3);
1804-
if (name != null) {
1805-
return name;
1820+
if (_symbolsCanonical) {
1821+
String name = _symbols.findName(q1, q2, q3);
1822+
if (name != null) {
1823+
return name;
1824+
}
18061825
}
18071826
int[] quads = _quadBuffer;
18081827
quads[0] = q1;
@@ -1818,7 +1837,7 @@ private final String findName(int[] quads, int qlen, int lastQuad, int lastQuadB
18181837
_quadBuffer = quads = _growArrayBy(quads, quads.length);
18191838
}
18201839
quads[qlen++] = pad(lastQuad, lastQuadBytes);
1821-
String name = _symbols.findName(quads, qlen);
1840+
String name = _symbolsCanonical ? _symbols.findName(quads, qlen) : null;
18221841
if (name == null) {
18231842
return addName(quads, qlen, lastQuadBytes);
18241843
}
@@ -1933,6 +1952,9 @@ private final String addName(int[] quads, int qlen, int lastQuadBytes)
19331952

19341953
// Ok. Now we have the character array, and can construct the String
19351954
String baseName = new String(cbuf, 0, cix);
1955+
if (!_symbolsCanonical) {
1956+
return baseName;
1957+
}
19361958
// And finally, un-align if necessary
19371959
if (lastQuadBytes < 4) {
19381960
quads[qlen-1] = lastQuad;

src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java

+37-15
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,18 @@ public class UTF8StreamJsonParser
5959
*/
6060
final protected ByteQuadsCanonicalizer _symbols;
6161

62+
/**
63+
* Marker flag to indicate that standard symbol handling is used
64+
* (one with symbol table assisted canonicalization. May be disabled
65+
* in which case alternate stream-line, non-canonicalizing handling
66+
* is used: usually due to set of symbols
67+
* (Object property names) is unbounded and will not benefit from
68+
* canonicalization attempts.
69+
*
70+
* @since 2.16
71+
*/
72+
final protected boolean _symbolsCanonical;
73+
6274
/*
6375
/**********************************************************
6476
/* Parsing state
@@ -192,6 +204,7 @@ public UTF8StreamJsonParser(IOContext ctxt, int features, InputStream in,
192204
_inputStream = in;
193205
_objectCodec = codec;
194206
_symbols = sym;
207+
_symbolsCanonical = sym.isCanonicalizing();
195208
_inputBuffer = inputBuffer;
196209
_inputPtr = start;
197210
_inputEnd = end;
@@ -2112,7 +2125,7 @@ protected final String parseEscapedName(int[] quads, int qlen, int currQuad, int
21122125
}
21132126
quads[qlen++] = _padLastQuad(currQuad, currQuadBytes);
21142127
}
2115-
String name = _symbols.findName(quads, qlen);
2128+
String name = _symbolsCanonical ? _symbols.findName(quads, qlen) : null;
21162129
if (name == null) {
21172130
name = addName(quads, qlen, currQuadBytes);
21182131
}
@@ -2192,7 +2205,7 @@ protected String _handleOddName(int ch) throws IOException
21922205
}
21932206
quads[qlen++] = currQuad;
21942207
}
2195-
String name = _symbols.findName(quads, qlen);
2208+
String name = _symbolsCanonical ? _symbols.findName(quads, qlen) : null;
21962209
if (name == null) {
21972210
name = addName(quads, qlen, currQuadBytes);
21982211
}
@@ -2297,7 +2310,7 @@ protected String _parseAposName() throws IOException
22972310
}
22982311
quads[qlen++] = _padLastQuad(currQuad, currQuadBytes);
22992312
}
2300-
String name = _symbols.findName(quads, qlen);
2313+
String name = _symbolsCanonical ? _symbols.findName(quads, qlen) : null;
23012314
if (name == null) {
23022315
name = addName(quads, qlen, currQuadBytes);
23032316
}
@@ -2314,10 +2327,12 @@ private final String findName(int q1, int lastQuadBytes)
23142327
throws JsonParseException, StreamConstraintsException
23152328
{
23162329
q1 = _padLastQuad(q1, lastQuadBytes);
2317-
// Usually we'll find it from the canonical symbol table already
2318-
String name = _symbols.findName(q1);
2319-
if (name != null) {
2320-
return name;
2330+
if (_symbolsCanonical) {
2331+
// Usually we'll find it from the canonical symbol table already
2332+
String name = _symbols.findName(q1);
2333+
if (name != null) {
2334+
return name;
2335+
}
23212336
}
23222337
// If not, more work. We'll need add stuff to buffer
23232338
_quadBuffer[0] = q1;
@@ -2328,10 +2343,12 @@ private final String findName(int q1, int q2, int lastQuadBytes)
23282343
throws JsonParseException, StreamConstraintsException
23292344
{
23302345
q2 = _padLastQuad(q2, lastQuadBytes);
2331-
// Usually we'll find it from the canonical symbol table already
2332-
String name = _symbols.findName(q1, q2);
2333-
if (name != null) {
2334-
return name;
2346+
if (_symbolsCanonical) {
2347+
// Usually we'll find it from the canonical symbol table already
2348+
String name = _symbols.findName(q1, q2);
2349+
if (name != null) {
2350+
return name;
2351+
}
23352352
}
23362353
// If not, more work. We'll need add stuff to buffer
23372354
_quadBuffer[0] = q1;
@@ -2343,9 +2360,11 @@ private final String findName(int q1, int q2, int q3, int lastQuadBytes)
23432360
throws JsonParseException, StreamConstraintsException
23442361
{
23452362
q3 = _padLastQuad(q3, lastQuadBytes);
2346-
String name = _symbols.findName(q1, q2, q3);
2347-
if (name != null) {
2348-
return name;
2363+
if (_symbolsCanonical) {
2364+
String name = _symbols.findName(q1, q2, q3);
2365+
if (name != null) {
2366+
return name;
2367+
}
23492368
}
23502369
int[] quads = _quadBuffer;
23512370
quads[0] = q1;
@@ -2361,7 +2380,7 @@ private final String findName(int[] quads, int qlen, int lastQuad, int lastQuadB
23612380
_quadBuffer = quads = growArrayBy(quads, quads.length);
23622381
}
23632382
quads[qlen++] = _padLastQuad(lastQuad, lastQuadBytes);
2364-
String name = _symbols.findName(quads, qlen);
2383+
String name = _symbolsCanonical ? _symbols.findName(quads, qlen) : null;
23652384
if (name == null) {
23662385
return addName(quads, qlen, lastQuadBytes);
23672386
}
@@ -2475,6 +2494,9 @@ private final String addName(int[] quads, int qlen, int lastQuadBytes)
24752494

24762495
// Ok. Now we have the character array, and can construct the String
24772496
String baseName = new String(cbuf, 0, cix);
2497+
if (!_symbolsCanonical) {
2498+
return baseName;
2499+
}
24782500
// And finally, un-align if necessary
24792501
if (lastQuadBytes < 4) {
24802502
quads[qlen-1] = lastQuad;

src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParserBase.java

+33-11
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,18 @@ public abstract class NonBlockingJsonParserBase
146146
*/
147147
final protected ByteQuadsCanonicalizer _symbols;
148148

149+
/**
150+
* Marker flag to indicate that standard symbol handling is used
151+
* (one with symbol table assisted canonicalization. May be disabled
152+
* in which case alternate stream-line, non-canonicalizing handling
153+
* is used: usually due to set of symbols
154+
* (Object property names) is unbounded and will not benefit from
155+
* canonicalization attempts.
156+
*
157+
* @since 2.16
158+
*/
159+
final protected boolean _symbolsCanonical;
160+
149161
/**
150162
* Temporary buffer used for name parsing.
151163
*/
@@ -257,6 +269,7 @@ public NonBlockingJsonParserBase(IOContext ctxt, int parserFeatures,
257269
{
258270
super(ctxt, parserFeatures);
259271
_symbols = sym;
272+
_symbolsCanonical = sym.isCanonicalizing();
260273
_currToken = null;
261274
_majorState = MAJOR_INITIAL;
262275
_majorStateAfterValue = MAJOR_ROOT;
@@ -644,10 +657,12 @@ protected final String _findName(int q1, int lastQuadBytes)
644657
throws JsonParseException, StreamConstraintsException
645658
{
646659
q1 = _padLastQuad(q1, lastQuadBytes);
647-
// Usually we'll find it from the canonical symbol table already
648-
String name = _symbols.findName(q1);
649-
if (name != null) {
650-
return name;
660+
if (_symbolsCanonical) {
661+
// Usually we'll find it from the canonical symbol table already
662+
String name = _symbols.findName(q1);
663+
if (name != null) {
664+
return name;
665+
}
651666
}
652667
// If not, more work. We'll need add stuff to buffer
653668
_quadBuffer[0] = q1;
@@ -658,10 +673,12 @@ protected final String _findName(int q1, int q2, int lastQuadBytes)
658673
throws JsonParseException, StreamConstraintsException
659674
{
660675
q2 = _padLastQuad(q2, lastQuadBytes);
661-
// Usually we'll find it from the canonical symbol table already
662-
String name = _symbols.findName(q1, q2);
663-
if (name != null) {
664-
return name;
676+
if (_symbolsCanonical) {
677+
// Usually we'll find it from the canonical symbol table already
678+
String name = _symbols.findName(q1, q2);
679+
if (name != null) {
680+
return name;
681+
}
665682
}
666683
// If not, more work. We'll need add stuff to buffer
667684
_quadBuffer[0] = q1;
@@ -673,9 +690,11 @@ protected final String _findName(int q1, int q2, int q3, int lastQuadBytes)
673690
throws JsonParseException, StreamConstraintsException
674691
{
675692
q3 = _padLastQuad(q3, lastQuadBytes);
676-
String name = _symbols.findName(q1, q2, q3);
677-
if (name != null) {
678-
return name;
693+
if (_symbolsCanonical) {
694+
String name = _symbols.findName(q1, q2, q3);
695+
if (name != null) {
696+
return name;
697+
}
679698
}
680699
int[] quads = _quadBuffer;
681700
quads[0] = q1;
@@ -790,6 +809,9 @@ protected final String _addName(int[] quads, int qlen, int lastQuadBytes)
790809

791810
// Ok. Now we have the character array, and can construct the String
792811
String baseName = new String(cbuf, 0, cix);
812+
if (!_symbolsCanonical) {
813+
return baseName;
814+
}
793815
// And finally, un-align if necessary
794816
if (lastQuadBytes < 4) {
795817
quads[qlen-1] = lastQuad;

0 commit comments

Comments
 (0)