Skip to content

Commit 0259677

Browse files
committed
Merge branch '2.19'
2 parents 5d21364 + 2e402c7 commit 0259677

File tree

5 files changed

+81
-57
lines changed

5 files changed

+81
-57
lines changed

cbor/src/main/java/tools/jackson/dataformat/cbor/CBORParser.java

Lines changed: 49 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -2350,18 +2350,14 @@ protected void _finishToken() throws JacksonException
23502350
}
23512351
return;
23522352
}
2353-
// 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
2354-
// the longest individual unit is 4 bytes (surrogate pair) so we
2355-
// actually need len+3 bytes to avoid bounds checks
23562353
// 18-Jan-2024, tatu: For malicious input / Fuzzers, need to worry about overflow
23572354
// like Integer.MAX_VALUE
2358-
final int needed = Math.max(len, len + 3);
23592355
final int available = _inputEnd - _inputPtr;
23602356

2361-
if ((available >= needed)
2357+
if ((available >= len)
23622358
// if not, could we read? NOTE: we do not require it, just attempt to read
2363-
|| ((_inputBuffer.length >= needed)
2364-
&& _tryToLoadToHaveAtLeast(needed))) {
2359+
|| ((_inputBuffer.length >= len)
2360+
&& _tryToLoadToHaveAtLeast(len))) {
23652361
_finishShortText(len);
23662362
return;
23672363
}
@@ -2392,22 +2388,18 @@ protected String _finishTextToken(int ch) throws JacksonException
23922388
_finishChunkedText();
23932389
return _textBuffer.contentsAsString();
23942390
}
2395-
// 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
2396-
// the longest individual unit is 4 bytes (surrogate pair) so we
2397-
// actually need len+3 bytes to avoid bounds checks
23982391

23992392
// 19-Mar-2021, tatu: [dataformats-binary#259] shows the case where length
24002393
// we get is Integer.MAX_VALUE, leading to overflow. Could change values
24012394
// to longs but simpler to truncate "needed" (will never pass following test
24022395
// due to inputBuffer never being even close to that big).
24032396

2404-
final int needed = Math.max(len + 3, len);
24052397
final int available = _inputEnd - _inputPtr;
24062398

2407-
if ((available >= needed)
2399+
if ((available >= len)
24082400
// if not, could we read? NOTE: we do not require it, just attempt to read
2409-
|| ((_inputBuffer.length >= needed)
2410-
&& _tryToLoadToHaveAtLeast(needed))) {
2401+
|| ((_inputBuffer.length >= len)
2402+
&& _tryToLoadToHaveAtLeast(len))) {
24112403
return _finishShortText(len);
24122404
}
24132405
// If not enough space, need handling similar to chunked
@@ -2435,7 +2427,7 @@ private final String _finishShortText(int len) throws JacksonException
24352427
final byte[] inputBuf = _inputBuffer;
24362428

24372429
// Let's actually do a tight loop for ASCII first:
2438-
final int end = inPtr + len;
2430+
final int end = _inputPtr;
24392431

24402432
int i;
24412433
while ((i = inputBuf[inPtr]) >= 0) {
@@ -2452,44 +2444,50 @@ private final String _finishShortText(int len) throws JacksonException
24522444
final int[] codes = UTF8_UNIT_CODES;
24532445
do {
24542446
i = inputBuf[inPtr++] & 0xFF;
2455-
switch (codes[i]) {
2456-
case 0:
2457-
break;
2458-
case 1:
2459-
{
2460-
final int c2 = inputBuf[inPtr++];
2461-
if ((c2 & 0xC0) != 0x080) {
2462-
_reportInvalidOther(c2 & 0xFF, inPtr);
2463-
}
2464-
i = ((i & 0x1F) << 6) | (c2 & 0x3F);
2447+
int code = codes[i];
2448+
if (code != 0) {
2449+
// 05-Jul-2021, tatu: As per [dataformats-binary#289] need to
2450+
// be careful wrt end-of-buffer truncated codepoints
2451+
if ((inPtr + code) > end) {
2452+
final int firstCharOffset = len - (end - inPtr) - 1;
2453+
_reportTruncatedUTF8InString(len, firstCharOffset, i, code);
24652454
}
2466-
break;
2467-
case 2:
2468-
{
2469-
final int c2 = inputBuf[inPtr++];
2470-
if ((c2 & 0xC0) != 0x080) {
2471-
_reportInvalidOther(c2 & 0xFF, inPtr);
2455+
2456+
switch (code) {
2457+
case 1: {
2458+
final int c2 = inputBuf[inPtr++];
2459+
if ((c2 & 0xC0) != 0x080) {
2460+
_reportInvalidOther(c2 & 0xFF, inPtr);
2461+
}
2462+
i = ((i & 0x1F) << 6) | (c2 & 0x3F);
24722463
}
2473-
final int c3 = inputBuf[inPtr++];
2474-
if ((c3 & 0xC0) != 0x080) {
2475-
_reportInvalidOther(c3 & 0xFF, inPtr);
2464+
break;
2465+
case 2: {
2466+
final int c2 = inputBuf[inPtr++];
2467+
if ((c2 & 0xC0) != 0x080) {
2468+
_reportInvalidOther(c2 & 0xFF, inPtr);
2469+
}
2470+
final int c3 = inputBuf[inPtr++];
2471+
if ((c3 & 0xC0) != 0x080) {
2472+
_reportInvalidOther(c3 & 0xFF, inPtr);
2473+
}
2474+
i = ((i & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
24762475
}
2477-
i = ((i & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F);
2476+
break;
2477+
case 3:
2478+
// 30-Jan-2021, tatu: TODO - validate these too?
2479+
i = ((i & 0x07) << 18)
2480+
| ((inputBuf[inPtr++] & 0x3F) << 12)
2481+
| ((inputBuf[inPtr++] & 0x3F) << 6)
2482+
| (inputBuf[inPtr++] & 0x3F);
2483+
// note: this is the codepoint value; need to split, too
2484+
i -= 0x10000;
2485+
outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
2486+
i = 0xDC00 | (i & 0x3FF);
2487+
break;
2488+
default: // invalid
2489+
_reportInvalidInitial(i);
24782490
}
2479-
break;
2480-
case 3:
2481-
// 30-Jan-2021, tatu: TODO - validate these too?
2482-
i = ((i & 0x07) << 18)
2483-
| ((inputBuf[inPtr++] & 0x3F) << 12)
2484-
| ((inputBuf[inPtr++] & 0x3F) << 6)
2485-
| (inputBuf[inPtr++] & 0x3F);
2486-
// note: this is the codepoint value; need to split, too
2487-
i -= 0x10000;
2488-
outBuf[outPtr++] = (char) (0xD800 | (i >> 10));
2489-
i = 0xDC00 | (i & 0x3FF);
2490-
break;
2491-
default: // invalid
2492-
_reportInvalidInitial(i);
24932491
}
24942492
outBuf[outPtr++] = (char) i;
24952493
} while (inPtr < end);
@@ -3919,18 +3917,16 @@ protected void _reportIncompleteBinaryRead(int expLen, int actLen) throws Stream
39193917
expLen, actLen), _currToken);
39203918
}
39213919

3922-
// @since 2.13
3923-
/*
3920+
// @since 2.18.1
39243921
private String _reportTruncatedUTF8InString(int strLenBytes, int truncatedCharOffset,
39253922
int firstUTFByteValue, int bytesExpected)
39263923
throws JacksonException
39273924
{
39283925
throw _constructReadException(String.format(
3929-
"Truncated UTF-8 character in Chunked Unicode String value (%d bytes): "
3926+
"Truncated UTF-8 character in Unicode String value (%d bytes): "
39303927
+"byte 0x%02X at offset #%d indicated %d more bytes needed",
39313928
strLenBytes, firstUTFByteValue, truncatedCharOffset, bytesExpected));
39323929
}
3933-
*/
39343930

39353931
// @since 2.13
39363932
private String _reportTruncatedUTF8InName(int strLenBytes, int truncatedCharOffset,

cbor/src/test/java/tools/jackson/dataformat/cbor/fuzz/CBORFuzz_35979_StringValueTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ public void testInvalidTextValueWithBrokenUTF8() throws Exception
2626
p.getText();
2727
fail("Should not pass");
2828
} catch (StreamReadException e) {
29-
verifyException(e, "Malformed UTF-8 character at the end of a");
29+
verifyException(e, "Truncated UTF-8 character in Unicode String value (256 bytes)");
3030
}
3131

3232
}

cbor/src/test/java/tools/jackson/dataformat/cbor/parse/ParseInvalidUTF8String236Test.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
import tools.jackson.dataformat.cbor.CBORParser;
66
import tools.jackson.dataformat.cbor.CBORTestBase;
77

8+
import java.nio.charset.StandardCharsets;
9+
import java.util.Arrays;
10+
811
public class ParseInvalidUTF8String236Test extends CBORTestBase
912
{
1013
// [dataformats-binary#236]: Original version; broken UTF-8 all around.
@@ -24,8 +27,8 @@ public void testShortString236Original() throws Exception
2427
}
2528

2629
// Variant where the length would be valid, but the last byte is partial UTF-8
27-
// code point
28-
public void testShortString236EndsWithPartialUTF8() throws Exception
30+
// code point and no more bytes are available due to end-of-stream
31+
public void testShortString236EndsWithPartialUTF8AtEndOfStream() throws Exception
2932
{
3033
final byte[] input = {0x63, 0x41, 0x2d, (byte) 0xda};
3134
try (CBORParser p = cborParser(input)) {
@@ -34,7 +37,23 @@ public void testShortString236EndsWithPartialUTF8() throws Exception
3437
String str = p.getText();
3538
fail("Should have failed, did not, String = '"+str+"'");
3639
} catch (StreamReadException e) {
37-
verifyException(e, "Malformed UTF-8 character at the end of");
40+
verifyException(e, "Truncated UTF-8 character in Unicode String value (3 bytes)");
41+
}
42+
}
43+
}
44+
45+
// Variant where the length would be valid, but the last byte is partial UTF-8
46+
// code point and the subsequent byte would be a valid continuation byte, but belongs to next data item
47+
public void testShortString236EndsWithPartialUTF8() throws Exception
48+
{
49+
final byte[] input = {0x62, 0x33, (byte) 0xdb, (byte) 0xa0};
50+
try (CBORParser p = cborParser(input)) {
51+
assertToken(JsonToken.VALUE_STRING, p.nextToken());
52+
try {
53+
String str = p.getText();
54+
fail("Should have failed, did not, String = '"+str+"'");
55+
} catch (StreamReadException e) {
56+
verifyException(e, "Truncated UTF-8 character in Unicode String value (2 bytes)");
3857
}
3958
}
4059
}

release-notes/CREDITS-2.x

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -346,3 +346,7 @@ Joachim Lous (@jlous)
346346
Robert Noack (@mr-robert)
347347
* Reported #509: IonValueMapper.builder() not implemented, does not register modules
348348
(2.18.0)
349+
350+
Knut Wannheden (@knutwannheden)
351+
* Contributed #518: Should not read past end for CBOR string values
352+
(2.18.1)

release-notes/VERSION-2.x

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ Active maintainers:
1414
=== Releases ===
1515
------------------------------------------------------------------------
1616

17+
2.18.1 (not yet released)
18+
19+
#518: Should not read past end for CBOR string values
20+
(contributed by Knut W)
21+
1722
2.18.0 (26-Sep-2024)
1823

1924
#167: (avro) Incompatibility with Avro >=1.9.0 (upgrade to Avro 1.11.3)

0 commit comments

Comments
 (0)