diff --git a/src/lazy/text/matched.rs b/src/lazy/text/matched.rs index d3bfb130..d68dcedb 100644 --- a/src/lazy/text/matched.rs +++ b/src/lazy/text/matched.rs @@ -188,28 +188,44 @@ impl MatchedInt { // Note: This UTF-8 validation step should be unnecessary as the parser only recognizes // ASCII integer characters. If this shows up in profiling, we could consider skipping it. let text = sanitized.as_utf8(matched_input.offset())?; - let int: Int = match i128::from_str_radix(text, self.radix()) { - Ok(i) => i.into(), + // Parse as u128 to handle the full range including i128::MIN, whose magnitude + // exceeds i128::MAX by one. + let magnitude = match u128::from_str_radix(text, self.radix()) { + Ok(m) => m, Err(parse_int_error) => { debug_assert!( - // `from_str_radix` can fail for a variety of reasons, but our rules for matching an - // int rule out most of them (empty str, invalid digit, etc). The only ones that should - // happen are overflow and underflow. In those cases, we fall back to using `BigInt`. - parse_int_error.kind() == &IntErrorKind::NegOverflow - || parse_int_error.kind() == &IntErrorKind::PosOverflow + // `from_str_radix` can fail for a variety of reasons, but our rules for + // matching an int rule out most of them (empty str, invalid digit, etc). + // The only one that should happen for u128 is overflow. + parse_int_error.kind() == &IntErrorKind::PosOverflow ); return cold_path!(IonResult::decoding_error(format!( - "encountered an int whose value was exceeded the supported range: '{}'", + "encountered an int whose value exceeded the supported range: '{}'", std::str::from_utf8(matched_input.bytes()).unwrap_or("invalid UTF-8") ))); } }; - if self.is_negative { - Ok(-int) + const I128_MIN_MAGNITUDE: u128 = i128::MIN.unsigned_abs(); + let int: Int = if self.is_negative { + if magnitude > I128_MIN_MAGNITUDE { + return cold_path!(IonResult::decoding_error(format!( + "encountered an int whose value exceeded the supported range: '{}'", + std::str::from_utf8(matched_input.bytes()).unwrap_or("invalid UTF-8") + ))); + } + (magnitude.wrapping_neg() as i128).into() } else { - Ok(int) - } + if magnitude > i128::MAX as u128 { + return cold_path!(IonResult::decoding_error(format!( + "encountered an int whose value exceeded the supported range: '{}'", + std::str::from_utf8(matched_input.bytes()).unwrap_or("invalid UTF-8") + ))); + } + (magnitude as i128).into() + }; + + Ok(int) } } @@ -1248,6 +1264,21 @@ mod tests { "-1234567890_1234567890_1234567890", Int::from(-1234567890_1234567890_1234567890i128), ), + // i32 boundary values + ("2147483647", Int::from(i32::MAX)), + ("-2147483648", Int::from(i32::MIN)), + // i64 boundary values + ("9223372036854775807", Int::from(i64::MAX)), + ("-9223372036854775808", Int::from(i64::MIN)), + // i128 boundary values + ( + "170141183460469231731687303715884105727", + Int::from(i128::MAX), + ), + ( + "-170141183460469231731687303715884105728", + Int::from(i128::MIN), + ), ]; for (input, expected) in tests { @@ -1256,6 +1287,29 @@ mod tests { Ok(()) } + #[test] + fn read_ints_overflow() { + fn expect_overflow(data: &str) { + let encoding_context = EncodingContext::empty(); + let context = encoding_context.get_ref(); + let mut buffer = TextBuffer::new(context, data.as_bytes()); + let matched = peek(TextBuffer::match_int).parse_next(&mut buffer).unwrap(); + let result = matched.read(buffer); + assert!(result.is_err(), "Expected overflow error for '{data}'"); + } + + let tests = [ + // i128::MAX + 1 (positive overflow) + "170141183460469231731687303715884105728", + // i128::MIN - 1 (negative overflow) + "-170141183460469231731687303715884105729", + ]; + + for input in tests { + expect_overflow(input); + } + } + #[test] fn read_timestamps() -> IonResult<()> { fn expect_timestamp(data: &str, expected: Timestamp) {