@@ -2350,18 +2350,14 @@ protected void _finishToken() throws JacksonException
2350
2350
}
2351
2351
return ;
2352
2352
}
2353
- // 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
2354
- // the longest individual unit is 4 bytes (surrogate pair) so we
2355
- // actually need len+3 bytes to avoid bounds checks
2356
2353
// 18-Jan-2024, tatu: For malicious input / Fuzzers, need to worry about overflow
2357
2354
// like Integer.MAX_VALUE
2358
- final int needed = Math .max (len , len + 3 );
2359
2355
final int available = _inputEnd - _inputPtr ;
2360
2356
2361
- if ((available >= needed )
2357
+ if ((available >= len )
2362
2358
// if not, could we read? NOTE: we do not require it, just attempt to read
2363
- || ((_inputBuffer .length >= needed )
2364
- && _tryToLoadToHaveAtLeast (needed ))) {
2359
+ || ((_inputBuffer .length >= len )
2360
+ && _tryToLoadToHaveAtLeast (len ))) {
2365
2361
_finishShortText (len );
2366
2362
return ;
2367
2363
}
@@ -2392,22 +2388,18 @@ protected String _finishTextToken(int ch) throws JacksonException
2392
2388
_finishChunkedText ();
2393
2389
return _textBuffer .contentsAsString ();
2394
2390
}
2395
- // 29-Jan-2021, tatu: as per [dataformats-binary#238] must keep in mind that
2396
- // the longest individual unit is 4 bytes (surrogate pair) so we
2397
- // actually need len+3 bytes to avoid bounds checks
2398
2391
2399
2392
// 19-Mar-2021, tatu: [dataformats-binary#259] shows the case where length
2400
2393
// we get is Integer.MAX_VALUE, leading to overflow. Could change values
2401
2394
// to longs but simpler to truncate "needed" (will never pass following test
2402
2395
// due to inputBuffer never being even close to that big).
2403
2396
2404
- final int needed = Math .max (len + 3 , len );
2405
2397
final int available = _inputEnd - _inputPtr ;
2406
2398
2407
- if ((available >= needed )
2399
+ if ((available >= len )
2408
2400
// if not, could we read? NOTE: we do not require it, just attempt to read
2409
- || ((_inputBuffer .length >= needed )
2410
- && _tryToLoadToHaveAtLeast (needed ))) {
2401
+ || ((_inputBuffer .length >= len )
2402
+ && _tryToLoadToHaveAtLeast (len ))) {
2411
2403
return _finishShortText (len );
2412
2404
}
2413
2405
// If not enough space, need handling similar to chunked
@@ -2435,7 +2427,7 @@ private final String _finishShortText(int len) throws JacksonException
2435
2427
final byte [] inputBuf = _inputBuffer ;
2436
2428
2437
2429
// Let's actually do a tight loop for ASCII first:
2438
- final int end = inPtr + len ;
2430
+ final int end = _inputPtr ;
2439
2431
2440
2432
int i ;
2441
2433
while ((i = inputBuf [inPtr ]) >= 0 ) {
@@ -2452,44 +2444,50 @@ private final String _finishShortText(int len) throws JacksonException
2452
2444
final int [] codes = UTF8_UNIT_CODES ;
2453
2445
do {
2454
2446
i = inputBuf [inPtr ++] & 0xFF ;
2455
- switch (codes [i ]) {
2456
- case 0 :
2457
- break ;
2458
- case 1 :
2459
- {
2460
- final int c2 = inputBuf [inPtr ++];
2461
- if ((c2 & 0xC0 ) != 0x080 ) {
2462
- _reportInvalidOther (c2 & 0xFF , inPtr );
2463
- }
2464
- i = ((i & 0x1F ) << 6 ) | (c2 & 0x3F );
2447
+ int code = codes [i ];
2448
+ if (code != 0 ) {
2449
+ // 05-Jul-2021, tatu: As per [dataformats-binary#289] need to
2450
+ // be careful wrt end-of-buffer truncated codepoints
2451
+ if ((inPtr + code ) > end ) {
2452
+ final int firstCharOffset = len - (end - inPtr ) - 1 ;
2453
+ _reportTruncatedUTF8InString (len , firstCharOffset , i , code );
2465
2454
}
2466
- break ;
2467
- case 2 :
2468
- {
2469
- final int c2 = inputBuf [inPtr ++];
2470
- if ((c2 & 0xC0 ) != 0x080 ) {
2471
- _reportInvalidOther (c2 & 0xFF , inPtr );
2455
+
2456
+ switch (code ) {
2457
+ case 1 : {
2458
+ final int c2 = inputBuf [inPtr ++];
2459
+ if ((c2 & 0xC0 ) != 0x080 ) {
2460
+ _reportInvalidOther (c2 & 0xFF , inPtr );
2461
+ }
2462
+ i = ((i & 0x1F ) << 6 ) | (c2 & 0x3F );
2472
2463
}
2473
- final int c3 = inputBuf [inPtr ++];
2474
- if ((c3 & 0xC0 ) != 0x080 ) {
2475
- _reportInvalidOther (c3 & 0xFF , inPtr );
2464
+ break ;
2465
+ case 2 : {
2466
+ final int c2 = inputBuf [inPtr ++];
2467
+ if ((c2 & 0xC0 ) != 0x080 ) {
2468
+ _reportInvalidOther (c2 & 0xFF , inPtr );
2469
+ }
2470
+ final int c3 = inputBuf [inPtr ++];
2471
+ if ((c3 & 0xC0 ) != 0x080 ) {
2472
+ _reportInvalidOther (c3 & 0xFF , inPtr );
2473
+ }
2474
+ i = ((i & 0x0F ) << 12 ) | ((c2 & 0x3F ) << 6 ) | (c3 & 0x3F );
2476
2475
}
2477
- i = ((i & 0x0F ) << 12 ) | ((c2 & 0x3F ) << 6 ) | (c3 & 0x3F );
2476
+ break ;
2477
+ case 3 :
2478
+ // 30-Jan-2021, tatu: TODO - validate these too?
2479
+ i = ((i & 0x07 ) << 18 )
2480
+ | ((inputBuf [inPtr ++] & 0x3F ) << 12 )
2481
+ | ((inputBuf [inPtr ++] & 0x3F ) << 6 )
2482
+ | (inputBuf [inPtr ++] & 0x3F );
2483
+ // note: this is the codepoint value; need to split, too
2484
+ i -= 0x10000 ;
2485
+ outBuf [outPtr ++] = (char ) (0xD800 | (i >> 10 ));
2486
+ i = 0xDC00 | (i & 0x3FF );
2487
+ break ;
2488
+ default : // invalid
2489
+ _reportInvalidInitial (i );
2478
2490
}
2479
- break ;
2480
- case 3 :
2481
- // 30-Jan-2021, tatu: TODO - validate these too?
2482
- i = ((i & 0x07 ) << 18 )
2483
- | ((inputBuf [inPtr ++] & 0x3F ) << 12 )
2484
- | ((inputBuf [inPtr ++] & 0x3F ) << 6 )
2485
- | (inputBuf [inPtr ++] & 0x3F );
2486
- // note: this is the codepoint value; need to split, too
2487
- i -= 0x10000 ;
2488
- outBuf [outPtr ++] = (char ) (0xD800 | (i >> 10 ));
2489
- i = 0xDC00 | (i & 0x3FF );
2490
- break ;
2491
- default : // invalid
2492
- _reportInvalidInitial (i );
2493
2491
}
2494
2492
outBuf [outPtr ++] = (char ) i ;
2495
2493
} while (inPtr < end );
@@ -3919,18 +3917,16 @@ protected void _reportIncompleteBinaryRead(int expLen, int actLen) throws Stream
3919
3917
expLen , actLen ), _currToken );
3920
3918
}
3921
3919
3922
- // @since 2.13
3923
- /*
3920
+ // @since 2.18.1
3924
3921
private String _reportTruncatedUTF8InString (int strLenBytes , int truncatedCharOffset ,
3925
3922
int firstUTFByteValue , int bytesExpected )
3926
3923
throws JacksonException
3927
3924
{
3928
3925
throw _constructReadException (String .format (
3929
- "Truncated UTF-8 character in Chunked Unicode String value (%d bytes): "
3926
+ "Truncated UTF-8 character in Unicode String value (%d bytes): "
3930
3927
+"byte 0x%02X at offset #%d indicated %d more bytes needed" ,
3931
3928
strLenBytes , firstUTFByteValue , truncatedCharOffset , bytesExpected ));
3932
3929
}
3933
- */
3934
3930
3935
3931
// @since 2.13
3936
3932
private String _reportTruncatedUTF8InName (int strLenBytes , int truncatedCharOffset ,
0 commit comments