From 3af5d5b9e30b344f81ed2cb1c4dc5272d4b38387 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 2 Mar 2024 07:32:34 +0100 Subject: [PATCH 1/6] Use `format::parse` in `DateTime::from_str` --- src/format/parse.rs | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/format/parse.rs b/src/format/parse.rs index ee26e66334..2a864378f3 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -525,10 +525,7 @@ impl str::FromStr for DateTime { fn from_str(s: &str) -> ParseResult> { let mut parsed = Parsed::new(); - let (s, _) = parse_rfc3339_relaxed(&mut parsed, s)?; - if !s.trim_start().is_empty() { - return Err(TOO_LONG); - } + parse(&mut parsed, s, [Item::Fixed(Fixed::RFC3339), Item::Space("")].iter())?; parsed.to_datetime() } } From 94fcec0fc7d768f286483e33f8c792f9f5bd3f48 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 2 Mar 2024 07:39:28 +0100 Subject: [PATCH 2/6] Move `FromStr` impl to `datetime` module --- src/datetime/mod.rs | 22 ++++++++++++++++++++++ src/format/parse.rs | 24 +----------------------- 2 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/datetime/mod.rs b/src/datetime/mod.rs index 366bbe6ad7..e7ef6f750c 100644 --- a/src/datetime/mod.rs +++ b/src/datetime/mod.rs @@ -1788,6 +1788,28 @@ where } } +/// Accepts a relaxed form of RFC3339. +/// A space or a 'T' are acepted as the separator between the date and time +/// parts. Additional spaces are allowed between each component. +/// +/// All of these examples are equivalent: +/// ``` +/// # use chrono::{DateTime, offset::FixedOffset}; +/// "2012-12-12T12:12:12Z".parse::>()?; +/// "2012-12-12 12:12:12Z".parse::>()?; +/// "2012- 12-12T12: 12:12Z".parse::>()?; +/// # Ok::<(), chrono::ParseError>(()) +/// ``` +impl str::FromStr for DateTime { + type Err = ParseError; + + fn from_str(s: &str) -> ParseResult> { + let mut parsed = Parsed::new(); + parse(&mut parsed, s, [Item::Fixed(Fixed::RFC3339), Item::Space("")].iter())?; + parsed.to_datetime() + } +} + /// Accepts a relaxed form of RFC3339. /// A space or a 'T' are accepted as the separator between the date and time /// parts. diff --git a/src/format/parse.rs b/src/format/parse.rs index 2a864378f3..953fd98c43 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -12,7 +12,7 @@ use super::scan; use super::{Fixed, InternalFixed, InternalInternal, Item, Numeric, Pad, Parsed}; use super::{ParseError, ParseResult}; use super::{BAD_FORMAT, INVALID, OUT_OF_RANGE, TOO_LONG, TOO_SHORT}; -use crate::{DateTime, FixedOffset, Weekday}; +use crate::Weekday; fn set_weekday_with_num_days_from_sunday(p: &mut Parsed, v: i64) -> ParseResult<()> { p.set_weekday(match v { @@ -508,28 +508,6 @@ where Ok(s) } -/// Accepts a relaxed form of RFC3339. -/// A space or a 'T' are acepted as the separator between the date and time -/// parts. Additional spaces are allowed between each component. -/// -/// All of these examples are equivalent: -/// ``` -/// # use chrono::{DateTime, offset::FixedOffset}; -/// "2012-12-12T12:12:12Z".parse::>()?; -/// "2012-12-12 12:12:12Z".parse::>()?; -/// "2012- 12-12T12: 12:12Z".parse::>()?; -/// # Ok::<(), chrono::ParseError>(()) -/// ``` -impl str::FromStr for DateTime { - type Err = ParseError; - - fn from_str(s: &str) -> ParseResult> { - let mut parsed = Parsed::new(); - parse(&mut parsed, s, [Item::Fixed(Fixed::RFC3339), Item::Space("")].iter())?; - parsed.to_datetime() - } -} - /// Accepts a relaxed form of RFC3339. /// /// Differences with RFC3339: From 1dc14e04eb967f91fdfd33b102c334165e19bf06 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 2 Mar 2024 07:45:43 +0100 Subject: [PATCH 3/6] Use `format::parse` in `FixedOffset::from_str` --- src/format/scan.rs | 4 ++-- src/offset/fixed.rs | 8 +++++--- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/format/scan.rs b/src/format/scan.rs index 1ab87b9dd5..825f37da68 100644 --- a/src/format/scan.rs +++ b/src/format/scan.rs @@ -181,7 +181,7 @@ pub(super) fn space(s: &str) -> ParseResult<&str> { } /// Consumes any number (including zero) of colon or spaces. -pub(crate) fn colon_or_space(s: &str) -> ParseResult<&str> { +pub(super) fn colon_or_space(s: &str) -> ParseResult<&str> { Ok(s.trim_start_matches(|c: char| c == ':' || c.is_whitespace())) } @@ -199,7 +199,7 @@ pub(crate) fn colon_or_space(s: &str) -> ParseResult<&str> { /// This is part of [RFC 3339 & ISO 8601]. /// /// [RFC 3339 & ISO 8601]: https://en.wikipedia.org/w/index.php?title=ISO_8601&oldid=1114309368#Time_offsets_from_UTC -pub(crate) fn timezone_offset( +pub(super) fn timezone_offset( mut s: &str, mut consume_colon: F, allow_zulu: bool, diff --git a/src/offset/fixed.rs b/src/offset/fixed.rs index e7382bed1d..57dca54fad 100644 --- a/src/offset/fixed.rs +++ b/src/offset/fixed.rs @@ -10,7 +10,7 @@ use core::str::FromStr; use rkyv::{Archive, Deserialize, Serialize}; use super::{MappedLocalTime, Offset, TimeZone}; -use crate::format::{scan, ParseError, OUT_OF_RANGE}; +use crate::format::{parse, Fixed, Item, ParseError, Parsed}; use crate::naive::{NaiveDate, NaiveDateTime}; /// The time zone with fixed offset, from UTC-23:59:59 to UTC+23:59:59. @@ -118,9 +118,11 @@ impl FixedOffset { /// Parsing a `str` into a `FixedOffset` uses the format [`%z`](crate::format::strftime). impl FromStr for FixedOffset { type Err = ParseError; + fn from_str(s: &str) -> Result { - let (_, offset) = scan::timezone_offset(s, scan::colon_or_space, false, false, true)?; - Self::east_opt(offset).ok_or(OUT_OF_RANGE) + let mut parsed = Parsed::new(); + parse(&mut parsed, s, [Item::Fixed(Fixed::TimezoneOffset)].iter())?; + parsed.to_fixed_offset() } } From c2d3345e2f844d8ce807e3f2fed86019c49224e4 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 2 Mar 2024 08:02:36 +0100 Subject: [PATCH 4/6] Simplify digit parsing in `timezone_offset` --- src/format/parse.rs | 6 +++--- src/format/scan.rs | 46 +++++++++++++++------------------------------ 2 files changed, 18 insertions(+), 34 deletions(-) diff --git a/src/format/parse.rs b/src/format/parse.rs index 953fd98c43..097e02d81a 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -1292,7 +1292,7 @@ mod tests { check("12345678", &[internal_fixed(TimezoneOffsetPermissive)], Err(INVALID)); check("+1", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_SHORT)); check("+12", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: 43_200)); - check("+123", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_SHORT)); + check("+123", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_LONG)); check("+1234", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: 45_240)); check("-1234", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: -45_240)); check("−1234", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: -45_240)); // MINUS SIGN (U+2212) @@ -1309,7 +1309,7 @@ mod tests { check("12:34:56", &[internal_fixed(TimezoneOffsetPermissive)], Err(INVALID)); check("+1:", &[internal_fixed(TimezoneOffsetPermissive)], Err(INVALID)); check("+12:", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: 43_200)); - check("+12:3", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_SHORT)); + check("+12:3", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_LONG)); check("+12:34", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: 45_240)); check("-12:34", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: -45_240)); check("−12:34", &[internal_fixed(TimezoneOffsetPermissive)], parsed!(offset: -45_240)); // MINUS SIGN (U+2212) @@ -1359,7 +1359,7 @@ mod tests { ); check("🤠+12:34", &[internal_fixed(TimezoneOffsetPermissive)], Err(INVALID)); check("+12:34🤠", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_LONG)); - check("+12:🤠34", &[internal_fixed(TimezoneOffsetPermissive)], Err(INVALID)); + check("+12:🤠34", &[internal_fixed(TimezoneOffsetPermissive)], Err(TOO_LONG)); check( "+12:34🤠", &[internal_fixed(TimezoneOffsetPermissive), Literal("🤠")], diff --git a/src/format/scan.rs b/src/format/scan.rs index 825f37da68..5a7c061ad0 100644 --- a/src/format/scan.rs +++ b/src/format/scan.rs @@ -215,34 +215,28 @@ where } } - const fn digits(s: &str) -> ParseResult<(u8, u8)> { + fn digits(s: &str) -> ParseResult { let b = s.as_bytes(); if b.len() < 2 { - Err(TOO_SHORT) - } else { - Ok((b[0], b[1])) + return Err(TOO_SHORT); + } + match (b[0], b[1]) { + (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => Ok((h1 - b'0') * 10 + (h2 - b'0')), + _ => Err(INVALID), } } let negative = match s.chars().next() { Some('+') => { - // PLUS SIGN (U+2B) s = &s['+'.len_utf8()..]; - false } Some('-') => { - // HYPHEN-MINUS (U+2D) s = &s['-'.len_utf8()..]; - true } - Some('−') => { + Some('−') if allow_tz_minus_sign => { // MINUS SIGN (U+2212) - if !allow_tz_minus_sign { - return Err(INVALID); - } s = &s['−'.len_utf8()..]; - true } Some(_) => return Err(INVALID), @@ -250,10 +244,7 @@ where }; // hours (00--99) - let hours = match digits(s)? { - (h1 @ b'0'..=b'9', h2 @ b'0'..=b'9') => i32::from((h1 - b'0') * 10 + (h2 - b'0')), - _ => return Err(INVALID), - }; + let hours = digits(s)? as i32; s = &s[2..]; // colons (and possibly other separators) @@ -261,21 +252,14 @@ where // minutes (00--59) // if the next two items are digits then we have to add minutes - let minutes = if let Ok(ds) = digits(s) { - match ds { - (m1 @ b'0'..=b'5', m2 @ b'0'..=b'9') => i32::from((m1 - b'0') * 10 + (m2 - b'0')), - (b'6'..=b'9', b'0'..=b'9') => return Err(OUT_OF_RANGE), - _ => return Err(INVALID), + let minutes = match digits(s) { + Ok(m) if m >= 60 => return Err(OUT_OF_RANGE), + Ok(m) => { + s = &s[2..]; + m as i32 } - } else if allow_missing_minutes { - 0 - } else { - return Err(TOO_SHORT); - }; - s = match s.len() { - len if len >= 2 => &s[2..], - 0 => s, - _ => return Err(TOO_SHORT), + Err(_) if allow_missing_minutes => 0, + Err(e) => return Err(e), }; let seconds = hours * 3600 + minutes * 60; From a799ecf037cc802debba519feeb3e2a6b4550d45 Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Sat, 2 Mar 2024 08:28:52 +0100 Subject: [PATCH 5/6] Support parsing negative timestamps --- src/format/parse.rs | 6 +++--- src/format/scan.rs | 26 ++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/src/format/parse.rs b/src/format/parse.rs index 097e02d81a..7dd301b4d9 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -357,7 +357,7 @@ where Minute => (2, false, Parsed::set_minute), Second => (2, false, Parsed::set_second), Nanosecond => (9, false, Parsed::set_nanosecond), - Timestamp => (usize::MAX, false, Parsed::set_timestamp), + Timestamp => (usize::MAX, true, Parsed::set_timestamp), // for the future expansion Internal(ref int) => match int._dummy {}, @@ -366,8 +366,7 @@ where s = s.trim_start(); let v = if signed { if s.starts_with('-') { - let v = try_consume!(scan::number(&s[1..], 1, usize::MAX)); - 0i64.checked_sub(v).ok_or(OUT_OF_RANGE)? + try_consume!(scan::negative_number(&s[1..], 1, usize::MAX)) } else if s.starts_with('+') { try_consume!(scan::number(&s[1..], 1, usize::MAX)) } else { @@ -765,6 +764,7 @@ mod tests { check(" + 42", &[Space(" "), num(Year)], Err(INVALID)); check("-", &[num(Year)], Err(TOO_SHORT)); check("+", &[num(Year)], Err(TOO_SHORT)); + check("-9223372036854775808", &[num(Timestamp)], parsed!(timestamp: i64::MIN)); // unsigned numeric check("345", &[num(Ordinal)], parsed!(ordinal: 345)); diff --git a/src/format/scan.rs b/src/format/scan.rs index 5a7c061ad0..051b2ea225 100644 --- a/src/format/scan.rs +++ b/src/format/scan.rs @@ -15,6 +15,28 @@ use crate::Weekday; /// Any number that does not fit in `i64` is an error. #[inline] pub(super) fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> { + let (s, n) = unsigned_number(s, min, max)?; + Ok((s, n.try_into().map_err(|_| OUT_OF_RANGE)?)) +} + +/// Tries to parse the negative number from `min` to `max` digits. +/// +/// The absence of digits at all is an unconditional error. +/// More than `max` digits are consumed up to the first `max` digits. +/// Any number that does not fit in `i64` is an error. +#[inline] +pub(super) fn negative_number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64)> { + let (s, n) = unsigned_number(s, min, max)?; + let signed_neg = (n as i64).wrapping_neg(); + if !signed_neg.is_negative() { + return Err(OUT_OF_RANGE); + } + Ok((s, signed_neg)) +} + +/// Tries to parse a number from `min` to `max` digits as an unsigned integer. +#[inline] +pub(super) fn unsigned_number(s: &str, min: usize, max: usize) -> ParseResult<(&str, u64)> { assert!(min <= max); // We are only interested in ascii numbers, so we can work with the `str` as bytes. We stop on @@ -25,7 +47,7 @@ pub(super) fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64) return Err(TOO_SHORT); } - let mut n = 0i64; + let mut n = 0u64; for (i, c) in bytes.iter().take(max).cloned().enumerate() { // cloned() = copied() if !c.is_ascii_digit() { @@ -36,7 +58,7 @@ pub(super) fn number(s: &str, min: usize, max: usize) -> ParseResult<(&str, i64) } } - n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as i64)) { + n = match n.checked_mul(10).and_then(|n| n.checked_add((c - b'0') as u64)) { Some(n) => n, None => return Err(OUT_OF_RANGE), }; From 3b7fa22ab2397bdd7c6d3799f96ac92d3934b15c Mon Sep 17 00:00:00 2001 From: Paul Dicker Date: Tue, 5 Mar 2024 15:48:46 +0100 Subject: [PATCH 6/6] Remove duplicate checks --- src/format/parse.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/format/parse.rs b/src/format/parse.rs index 7dd301b4d9..0358bebae5 100644 --- a/src/format/parse.rs +++ b/src/format/parse.rs @@ -424,25 +424,16 @@ where } &Internal(InternalFixed { val: InternalInternal::Nanosecond3NoDot }) => { - if s.len() < 3 { - return Err(TOO_SHORT); - } let nano = try_consume!(scan::nanosecond_fixed(s, 3)); parsed.set_nanosecond(nano)?; } &Internal(InternalFixed { val: InternalInternal::Nanosecond6NoDot }) => { - if s.len() < 6 { - return Err(TOO_SHORT); - } let nano = try_consume!(scan::nanosecond_fixed(s, 6)); parsed.set_nanosecond(nano)?; } &Internal(InternalFixed { val: InternalInternal::Nanosecond9NoDot }) => { - if s.len() < 9 { - return Err(TOO_SHORT); - } let nano = try_consume!(scan::nanosecond_fixed(s, 9)); parsed.set_nanosecond(nano)?; }