Skip to content

Commit 66769be

Browse files
authored
Handle ambiguous time parsing with MonthDay and YearMonth in ixdtf (#6717)
<!-- Thank you for your pull request to ICU4X! Reminder: try to use [Conventional Comments](https://conventionalcomments.org/) to make comments clearer. Please see https://github.com/unicode-org/icu4x/blob/main/CONTRIBUTING.md for general information on contributing to ICU4X. --> This PR adds handling for ambiguous cases that may occur when parsing time values that may be mistaken as a MonthDay or YearMonth. There's also a little reorganization in `datetime.rs`. It basically moves `parse_month_day` and `parse_annotated_year_month` further down the file so that the structure is date_time/date, year_month, month_day.
1 parent ddda230 commit 66769be

File tree

5 files changed

+145
-68
lines changed

5 files changed

+145
-68
lines changed

utils/ixdtf/src/core.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,11 @@ impl<'a, T: EncodingType> Cursor<'a, T> {
106106
self.pos
107107
}
108108

109+
/// Get current position
110+
pub(crate) fn set_position(&mut self, pos: usize) {
111+
self.pos = pos;
112+
}
113+
109114
/// Peek the value at next position (current + 1).
110115
pub(crate) fn peek(&self) -> ParserResult<Option<u8>> {
111116
self.peek_n(1)

utils/ixdtf/src/error.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,13 @@ pub enum ParseError {
108108
TimeDurationPartOrder,
109109
#[displaydoc("Invalid time duration designator.")]
110110
TimeDurationDesignator,
111+
112+
#[displaydoc("Time is ambiguous with MonthDay")]
113+
AmbiguousTimeMonthDay,
114+
#[displaydoc("Time is ambiguous with YearMonth")]
115+
AmbiguousTimeYearMonth,
116+
#[displaydoc("MonthDay was not valid.")]
117+
InvalidMonthDay,
111118
}
112119

113120
impl core::error::Error for ParseError {}

utils/ixdtf/src/parsers/datetime.rs

Lines changed: 85 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -71,18 +71,71 @@ pub(crate) fn parse_annotated_date_time<'a, T: EncodingType>(
7171
})
7272
}
7373

74-
/// Parses an AnnotatedMonthDay.
75-
pub(crate) fn parse_annotated_month_day<'a, T: EncodingType>(
74+
/// Parses a `DateTime` record.
75+
fn parse_date_time<T: EncodingType>(cursor: &mut Cursor<T>) -> ParserResult<DateTimeRecord> {
76+
let date = parse_date(cursor)?;
77+
78+
// If there is no `DateTimeSeparator`, return date early.
79+
if !cursor.check_or(false, is_date_time_separator)? {
80+
return Ok(DateTimeRecord {
81+
date: Some(date),
82+
time: None,
83+
time_zone: None,
84+
});
85+
}
86+
87+
cursor.advance();
88+
89+
let time = parse_time_record(cursor)?;
90+
91+
let time_zone = if cursor.check_or(false, |ch| is_ascii_sign(ch) || is_utc_designator(ch))? {
92+
Some(timezone::parse_date_time_utc_offset(cursor)?)
93+
} else {
94+
None
95+
};
96+
97+
Ok(DateTimeRecord {
98+
date: Some(date),
99+
time: Some(time),
100+
time_zone,
101+
})
102+
}
103+
104+
/// Parses `Date` record.
105+
fn parse_date<T: EncodingType>(cursor: &mut Cursor<T>) -> ParserResult<DateRecord> {
106+
let year = parse_date_year(cursor)?;
107+
let hyphenated = cursor
108+
.check(is_hyphen)?
109+
.ok_or(ParseError::abrupt_end("Date"))?;
110+
111+
cursor.advance_if(hyphenated);
112+
113+
let month = parse_date_month(cursor)?;
114+
115+
let second_hyphen = cursor.check_or(false, is_hyphen)?;
116+
assert_syntax!(hyphenated == second_hyphen, DateSeparator);
117+
cursor.advance_if(second_hyphen);
118+
119+
let day = parse_date_day(cursor)?;
120+
121+
check_date_validity(year, month, day)?;
122+
123+
Ok(DateRecord { year, month, day })
124+
}
125+
126+
// ==== `YearMonth` parsing functions ====
127+
128+
/// Parse an annotated YearMonth
129+
pub(crate) fn parse_annotated_year_month<'a, T: EncodingType>(
76130
cursor: &mut Cursor<'a, T>,
77131
handler: impl FnMut(Annotation<'a, T>) -> Option<Annotation<'a, T>>,
78132
) -> ParserResult<IxdtfParseRecord<'a, T>> {
79-
let date = parse_month_day(cursor)?;
80-
133+
let year_month = parse_year_month(cursor)?;
81134
if !cursor.check_or(false, is_annotation_open)? {
82135
cursor.close()?;
83136

84137
return Ok(IxdtfParseRecord {
85-
date: Some(date),
138+
date: Some(year_month),
86139
time: None,
87140
offset: None,
88141
tz: None,
@@ -93,28 +146,36 @@ pub(crate) fn parse_annotated_month_day<'a, T: EncodingType>(
93146
let annotation_set = annotations::parse_annotation_set(cursor, handler)?;
94147

95148
Ok(IxdtfParseRecord {
96-
date: Some(date),
149+
date: Some(year_month),
97150
time: None,
98151
offset: None,
99152
tz: annotation_set.tz,
100153
calendar: annotation_set.calendar,
101154
})
102155
}
103156

104-
/// Parse an annotated YearMonth
105-
pub(crate) fn parse_annotated_year_month<'a, T: EncodingType>(
106-
cursor: &mut Cursor<'a, T>,
107-
handler: impl FnMut(Annotation<'a, T>) -> Option<Annotation<'a, T>>,
108-
) -> ParserResult<IxdtfParseRecord<'a, T>> {
157+
pub(crate) fn parse_year_month<T: EncodingType>(
158+
cursor: &mut Cursor<T>,
159+
) -> ParserResult<DateRecord> {
109160
let year = parse_date_year(cursor)?;
110161
cursor.advance_if(cursor.check_or(false, is_hyphen)?);
111162
let month = parse_date_month(cursor)?;
112163

113-
let date = DateRecord {
164+
Ok(DateRecord {
114165
year,
115166
month,
116167
day: 1,
117-
};
168+
})
169+
}
170+
171+
// ==== `MonthDay` parsing functions ====
172+
173+
/// Parses an AnnotatedMonthDay.
174+
pub(crate) fn parse_annotated_month_day<'a, T: EncodingType>(
175+
cursor: &mut Cursor<'a, T>,
176+
handler: impl FnMut(Annotation<'a, T>) -> Option<Annotation<'a, T>>,
177+
) -> ParserResult<IxdtfParseRecord<'a, T>> {
178+
let date = parse_month_day(cursor)?;
118179

119180
if !cursor.check_or(false, is_annotation_open)? {
120181
cursor.close()?;
@@ -139,60 +200,6 @@ pub(crate) fn parse_annotated_year_month<'a, T: EncodingType>(
139200
})
140201
}
141202

142-
/// Parses a `DateTime` record.
143-
fn parse_date_time<T: EncodingType>(cursor: &mut Cursor<T>) -> ParserResult<DateTimeRecord> {
144-
let date = parse_date(cursor)?;
145-
146-
// If there is no `DateTimeSeparator`, return date early.
147-
if !cursor.check_or(false, is_date_time_separator)? {
148-
return Ok(DateTimeRecord {
149-
date: Some(date),
150-
time: None,
151-
time_zone: None,
152-
});
153-
}
154-
155-
cursor.advance();
156-
157-
let time = parse_time_record(cursor)?;
158-
159-
let time_zone = if cursor.check_or(false, |ch| is_ascii_sign(ch) || is_utc_designator(ch))? {
160-
Some(timezone::parse_date_time_utc_offset(cursor)?)
161-
} else {
162-
None
163-
};
164-
165-
Ok(DateTimeRecord {
166-
date: Some(date),
167-
time: Some(time),
168-
time_zone,
169-
})
170-
}
171-
172-
/// Parses `Date` record.
173-
fn parse_date<T: EncodingType>(cursor: &mut Cursor<T>) -> ParserResult<DateRecord> {
174-
let year = parse_date_year(cursor)?;
175-
let hyphenated = cursor
176-
.check(is_hyphen)?
177-
.ok_or(ParseError::abrupt_end("Date"))?;
178-
179-
cursor.advance_if(hyphenated);
180-
181-
let month = parse_date_month(cursor)?;
182-
183-
let second_hyphen = cursor.check_or(false, is_hyphen)?;
184-
assert_syntax!(hyphenated == second_hyphen, DateSeparator);
185-
cursor.advance_if(second_hyphen);
186-
187-
let day = parse_date_day(cursor)?;
188-
189-
check_date_validity(year, month, day)?;
190-
191-
Ok(DateRecord { year, month, day })
192-
}
193-
194-
// ==== `MonthDay` parsing functions ====
195-
196203
/// Parses a `DateSpecMonthDay`
197204
pub(crate) fn parse_month_day<T: EncodingType>(cursor: &mut Cursor<T>) -> ParserResult<DateRecord> {
198205
let hyphenated = cursor
@@ -215,7 +222,9 @@ pub(crate) fn parse_month_day<T: EncodingType>(cursor: &mut Cursor<T>) -> Parser
215222

216223
let day = parse_date_day(cursor)?;
217224

218-
assert_syntax!(cursor.check_or(true, is_annotation_open)?, InvalidEnd);
225+
if !is_valid_month_day(day, month) {
226+
return Err(ParseError::InvalidMonthDay);
227+
}
219228

220229
Ok(DateRecord {
221230
year: 0,
@@ -224,6 +233,14 @@ pub(crate) fn parse_month_day<T: EncodingType>(cursor: &mut Cursor<T>) -> Parser
224233
})
225234
}
226235

236+
fn is_valid_month_day(month: u8, day: u8) -> bool {
237+
match month {
238+
2 | 4 | 6 | 9 | 11 if day >= 31 => false,
239+
2 if day == 30 => false,
240+
_ => day <= 31,
241+
}
242+
}
243+
227244
// ==== Unit Parsers ====
228245

229246
#[inline]

utils/ixdtf/src/parsers/tests.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,33 @@ fn invalid_time() {
525525
);
526526
}
527527

528+
#[test]
529+
fn invalid_ambiguous_time() {
530+
let bad_value = "1208-10";
531+
let err = IxdtfParser::from_str(bad_value).parse_time();
532+
assert_eq!(
533+
err,
534+
Err(ParseError::AmbiguousTimeMonthDay),
535+
"Invalid time parsing: \"{bad_value}\" is ambiguous."
536+
);
537+
538+
let bad_value = "12-14";
539+
let err = IxdtfParser::from_str(bad_value).parse_time();
540+
assert_eq!(
541+
err,
542+
Err(ParseError::AmbiguousTimeMonthDay),
543+
"Invalid time parsing: \"{bad_value}\" is ambiguous."
544+
);
545+
546+
let bad_value = "202112";
547+
let err = IxdtfParser::from_str(bad_value).parse_time();
548+
assert_eq!(
549+
err,
550+
Err(ParseError::AmbiguousTimeYearMonth),
551+
"Invalid time parsing: \"{bad_value}\" is ambiguous."
552+
);
553+
}
554+
528555
#[test]
529556
fn temporal_valid_instant_strings() {
530557
let instants = [

utils/ixdtf/src/parsers/time.rs

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use crate::{
1010
assert_syntax,
1111
core::EncodingType,
1212
parsers::{
13+
datetime::{parse_month_day, parse_year_month},
1314
grammar::{
1415
is_annotation_open, is_decimal_separator, is_time_designator, is_time_separator,
1516
is_utc_designator,
@@ -28,6 +29,7 @@ pub(crate) fn parse_annotated_time_record<'a, T: EncodingType>(
2829
cursor: &mut Cursor<'a, T>,
2930
handler: impl FnMut(Annotation<'a, T>) -> Option<Annotation<'a, T>>,
3031
) -> ParserResult<IxdtfParseRecord<'a, T>> {
32+
let start = cursor.pos();
3133
let designator = cursor.check_or(false, is_time_designator)?;
3234
cursor.advance_if(designator);
3335

@@ -45,6 +47,7 @@ pub(crate) fn parse_annotated_time_record<'a, T: EncodingType>(
4547
// Check if annotations exist.
4648
if !cursor.check_or(false, is_annotation_open)? {
4749
cursor.close()?;
50+
check_time_ambiguity(cursor, start)?;
4851

4952
return Ok(IxdtfParseRecord {
5053
date: None,
@@ -55,6 +58,7 @@ pub(crate) fn parse_annotated_time_record<'a, T: EncodingType>(
5558
});
5659
}
5760

61+
check_time_ambiguity(cursor, start)?;
5862
let annotations = annotations::parse_annotation_set(cursor, handler)?;
5963

6064
cursor.close()?;
@@ -68,6 +72,23 @@ pub(crate) fn parse_annotated_time_record<'a, T: EncodingType>(
6872
})
6973
}
7074

75+
#[inline]
76+
fn check_time_ambiguity<T: EncodingType>(cursor: &mut Cursor<T>, start: usize) -> ParserResult<()> {
77+
let current_loc = cursor.pos();
78+
// It is a Syntax Error if ParseText(Time DateTimeUTCOffset[~Z], DateSpecMonthDay) is a Parse Node.
79+
cursor.set_position(start);
80+
if parse_month_day(cursor).is_ok() {
81+
return Err(ParseError::AmbiguousTimeMonthDay);
82+
}
83+
// It is a Syntax Error if ParseText(Time DateTimeUTCOffset[~Z], DateSpecYearMonth) is a Parse Node.
84+
cursor.set_position(start);
85+
if parse_year_month(cursor).is_ok() {
86+
return Err(ParseError::AmbiguousTimeYearMonth);
87+
}
88+
cursor.set_position(current_loc);
89+
Ok(())
90+
}
91+
7192
/// Parse `TimeRecord`
7293
pub(crate) fn parse_time_record<T: EncodingType>(
7394
cursor: &mut Cursor<T>,

0 commit comments

Comments
 (0)