Skip to content

Commit ef5d673

Browse files
committed
Try to optimize encoding of surrogate pairs further
1 parent f57c128 commit ef5d673

File tree

1 file changed

+25
-22
lines changed

1 file changed

+25
-22
lines changed

src/main/java/com/fasterxml/jackson/core/json/UTF8JsonGenerator.java

Lines changed: 25 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -660,10 +660,6 @@ public void writeUTF8String(byte[] text, int offset, int len) throws IOException
660660
_outputBuffer[_outputTail++] = _quoteChar;
661661
}
662662

663-
private boolean isSurrogatePair(char ch) {
664-
return (ch & 0xD800) == 0xD800;
665-
}
666-
667663
/*
668664
/**********************************************************
669665
/* Output method implementations, unprocessed ("raw")
@@ -1494,8 +1490,6 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
14941490
final byte[] outputBuffer = _outputBuffer;
14951491
final int[] escCodes = _outputEscapes;
14961492

1497-
boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
1498-
14991493
while (offset < end) {
15001494
int ch = cbuf[offset++];
15011495
if (ch <= 0x7F) {
@@ -1517,14 +1511,17 @@ private final void _writeStringSegment2(final char[] cbuf, int offset, final int
15171511
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
15181512
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
15191513
} else {
1520-
// multibyte character
1521-
if (combineSurrogates && isSurrogatePair((char) ch) && offset < end) {
1522-
char highSurrogate = (char) ch;
1523-
char lowSurrogate = cbuf[offset++];
1524-
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1525-
} else {
1526-
outputPtr = _outputMultiByteChar(ch, outputPtr);
1514+
// 3- or 4-byte character
1515+
if (_isSurrogateChar((char) ch)) {
1516+
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
1517+
if (combineSurrogates && offset < end) {
1518+
char highSurrogate = (char) ch;
1519+
char lowSurrogate = cbuf[offset++];
1520+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1521+
continue;
1522+
}
15271523
}
1524+
outputPtr = _outputMultiByteChar(ch, outputPtr);
15281525
}
15291526
}
15301527
_outputTail = outputPtr;
@@ -1541,8 +1538,6 @@ private final void _writeStringSegment2(final String text, int offset, final int
15411538
final byte[] outputBuffer = _outputBuffer;
15421539
final int[] escCodes = _outputEscapes;
15431540

1544-
boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
1545-
15461541
while (offset < end) {
15471542
int ch = text.charAt(offset++);
15481543
if (ch <= 0x7F) {
@@ -1564,14 +1559,17 @@ private final void _writeStringSegment2(final String text, int offset, final int
15641559
outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6));
15651560
outputBuffer[outputPtr++] = (byte) (0x80 | (ch & 0x3f));
15661561
} else {
1567-
// multibyte character
1568-
if (combineSurrogates && isSurrogatePair((char) ch) && offset < end) {
1569-
char highSurrogate = (char) ch;
1570-
char lowSurrogate = text.charAt(offset++);
1571-
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1572-
} else {
1573-
outputPtr = _outputMultiByteChar(ch, outputPtr);
1562+
// 3- or 4-byte character
1563+
if (_isSurrogateChar((char) ch)) {
1564+
final boolean combineSurrogates = Feature.COMBINE_UNICODE_SURROGATES_IN_UTF8.enabledIn(_features);
1565+
if (combineSurrogates && offset < end) {
1566+
char highSurrogate = (char) ch;
1567+
char lowSurrogate = text.charAt(offset++);
1568+
outputPtr = _outputSurrogatePair(highSurrogate, lowSurrogate, outputPtr);
1569+
continue;
1570+
}
15741571
}
1572+
outputPtr = _outputMultiByteChar(ch, outputPtr);
15751573
}
15761574
}
15771575
_outputTail = outputPtr;
@@ -2244,5 +2242,10 @@ protected final void _flushBuffer() throws IOException
22442242
private byte[] getHexBytes() {
22452243
return _cfgWriteHexUppercase ? HEX_BYTES_UPPER : HEX_BYTES_LOWER;
22462244
}
2245+
2246+
// @since 2.18
2247+
private boolean _isSurrogateChar(char ch) {
2248+
return (ch & 0xD800) == 0xD800;
2249+
}
22472250
}
22482251

0 commit comments

Comments
 (0)