diff --git a/Changelog.md b/Changelog.md index 79f06f26..329cc2c0 100644 --- a/Changelog.md +++ b/Changelog.md @@ -18,8 +18,15 @@ ### Bug Fixes +- [#939]: Fix parsing error of the tag from buffered reader, when the first byte `<` + is the last in the `BufRead` internal buffer. This is the regression from [#936]. + + ### Misc Changes +[#936]: https://github.com/tafia/quick-xml/pull/936 +[#939]: https://github.com/tafia/quick-xml/issues/939 + ## 0.39.1 -- 2026-02-15 diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs index 76e6138c..2a6f22de 100644 --- a/src/reader/async_tokio.rs +++ b/src/reader/async_tokio.rs @@ -425,6 +425,7 @@ mod test { read_event_into_async, read_until_close_async, TokioAdapter, + 1, &mut Vec::new(), async, await diff --git a/src/reader/buffered_reader.rs b/src/reader/buffered_reader.rs index 1074002d..32aa313e 100644 --- a/src/reader/buffered_reader.rs +++ b/src/reader/buffered_reader.rs @@ -195,8 +195,10 @@ macro_rules! impl_buffered_source { buf: &'b mut Vec, position: &mut u64, ) -> Result<&'b [u8]> { - let mut read = 0; + let mut read = 1; let start = buf.len(); + // '<' was consumed in peek_one(), but not placed in buf + buf.push(b'<'); loop { let available = match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) if n.is_empty() => break, @@ -240,9 +242,10 @@ macro_rules! impl_buffered_source { // Peeked ' io::Result> { + // That method is called only when available buffer starts from '<' + // We need to consume it + self $(.$reader)? .consume(1); let available = loop { break match self $(.$reader)? .fill_buf() $(.$await)? { Ok(n) => n, @@ -317,12 +323,7 @@ macro_rules! impl_buffered_source { Err(e) => return Err(e), }; }; - debug_assert!( - available.starts_with(b"<"), - "markup must start from '<':\n{:?}", - crate::utils::Bytes(available) - ); - Ok(available.get(1).cloned()) + Ok(available.first().cloned()) } }; } @@ -512,6 +513,7 @@ mod test { read_event_impl, read_until_close, identity, + 1, &mut Vec::new() ); } diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 2cc828bd..79e94ebd 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -1237,6 +1237,7 @@ mod test { $read_until_close:ident, // constructor of the XML source on which internal functions will be called $source:path, + $skip:literal, // constructor of the buffer to which read data will stored $buf:expr $(, $async:ident, $await:ident)? @@ -1258,9 +1259,9 @@ mod test { #[ignore = "start CDATA sequence fully checked outside of `read_bang_element`"] $($async)? fn not_properly_start() { let buf = $buf; - let mut position = 1; - let mut input = b"other content".as_ref(); - // ^= 1 + let mut position = 0; + let mut input = &b"other content"[$skip..]; + // ^= 0 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedCData), @@ -1278,8 +1279,8 @@ mod test { $($async)? fn not_closed() { let buf = $buf; let mut position = 0; - let mut input = b" assert_eq!(cause, SyntaxError::UnclosedCData), @@ -1296,7 +1297,7 @@ mod test { $($async)? fn empty() { let buf = $buf; let mut position = 0; - let mut input = b"other content".as_ref(); + let mut input = &b"other content"[$skip..]; // ^= 0 ^= 12 let (ty, bytes) = $source(&mut input) @@ -1317,8 +1318,8 @@ mod test { $($async)? fn with_content() { let buf = $buf; let mut position = 0; - let mut input = b"content]]>other content]]>".as_ref(); - // ^= 0 ^= 29 + let mut input = &b"content]]>other content]]>"[$skip..]; + // ^= 0 ^= 29 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) @@ -1356,9 +1357,9 @@ mod test { #[ignore = "start comment sequence fully checked outside of `read_bang_element`"] $($async)? fn not_properly_start() { let buf = $buf; - let mut position = 1; - let mut input = b"other content".as_ref(); - // ^= 1 + let mut position = 0; + let mut input = &b"other content"[$skip..]; + // ^= 1 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), @@ -1374,8 +1375,8 @@ mod test { $($async)? fn not_properly_end() { let buf = $buf; let mut position = 0; - let mut input = b"other content".as_ref(); - // ^= 0 ^= 17 + let mut input = &b"other content"[$skip..]; + // ^= 0 ^= 17 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), @@ -1391,8 +1392,8 @@ mod test { $($async)? fn not_closed1() { let buf = $buf; let mut position = 0; - let mut input = b"other content".as_ref(); - // ^= 0 ^= 18 + let mut input = &b"other content"[$skip..]; + // ^= 0 ^= 18 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), @@ -1425,8 +1426,8 @@ mod test { $($async)? fn not_closed3() { let buf = $buf; let mut position = 0; - let mut input = b"other content".as_ref(); - // ^= 0 ^= 19 + let mut input = &b"other content"[$skip..]; + // ^= 0 ^= 19 match $source(&mut input).read_bang_element(buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedComment), @@ -1442,8 +1443,8 @@ mod test { $($async)? fn empty() { let buf = $buf; let mut position = 0; - let mut input = b"other content".as_ref(); - // ^= 0 ^= 7 + let mut input = &b"other content"[$skip..]; + // ^= 0 ^= 7 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) @@ -1460,8 +1461,8 @@ mod test { $($async)? fn with_content() { let buf = $buf; let mut position = 0; - let mut input = b"comment<--->other content".as_ref(); - // ^= 0 ^= 18 + let mut input = &b"comment<--->other content"[$skip..]; + // ^= 0 ^= 18 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) @@ -1487,8 +1488,8 @@ mod test { $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; - let mut input = b" assert_eq!(cause, SyntaxError::UnclosedDoctype), @@ -1504,8 +1505,8 @@ mod test { $($async)? fn without_space() { let buf = $buf; let mut position = 0; - let mut input = b" assert_eq!(cause, SyntaxError::UnclosedDoctype), @@ -1521,8 +1522,8 @@ mod test { $($async)? fn empty() { let buf = $buf; let mut position = 0; - let mut input = b"other content".as_ref(); - // ^= 0 ^= 10 + let mut input = &b"other content"[$skip..]; + // ^= 0 ^= 10 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) @@ -1539,8 +1540,8 @@ mod test { $($async)? fn not_closed() { let buf = $buf; let mut position = 0; - let mut input = b" assert_eq!(cause, SyntaxError::UnclosedDoctype), @@ -1561,8 +1562,8 @@ mod test { $($async)? fn not_properly_start() { let buf = $buf; let mut position = 0; - let mut input = b" assert_eq!(cause, SyntaxError::UnclosedDoctype), @@ -1578,8 +1579,8 @@ mod test { $($async)? fn without_space() { let buf = $buf; let mut position = 0; - let mut input = b" assert_eq!(cause, SyntaxError::UnclosedDoctype), @@ -1595,8 +1596,8 @@ mod test { $($async)? fn empty() { let buf = $buf; let mut position = 0; - let mut input = b"other content".as_ref(); - // ^= 0 ^= 10 + let mut input = &b"other content"[$skip..]; + // ^= 0 ^= 10 let (ty, bytes) = $source(&mut input) .read_bang_element(buf, &mut position) @@ -1613,8 +1614,8 @@ mod test { $($async)? fn not_closed() { let buf = $buf; let mut position = 0; - let mut input = b" assert_eq!(cause, SyntaxError::UnclosedDoctype), @@ -1809,12 +1810,14 @@ mod test { use pretty_assertions::assert_eq; /// Checks that nothing was read from empty buffer + /// `<` read in peek_one that is called before read_with, that is why it in the input buffer + /// peek_one, however, does not increment position for simplicity of the code #[$test] $($async)? fn empty() { let buf = $buf; - let mut position = 1; - let mut input = b"".as_ref(); - // ^= 1 + let mut position = 0; + let mut input = &b"<"[$skip..]; + // ^= 1 match $source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? { Err(Error::Syntax(cause)) => assert_eq!(cause, SyntaxError::UnclosedTag), @@ -1833,13 +1836,13 @@ mod test { #[$test] $($async)? fn empty_tag() { let buf = $buf; - let mut position = 1; - let mut input = b">".as_ref(); - // ^= 2 + let mut position = 0; + let mut input = &b"<>"[$skip..]; + // ^= 2 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b"") + Bytes(b"<") ); assert_eq!(position, 2); } @@ -1847,13 +1850,13 @@ mod test { #[$test] $($async)? fn normal() { let buf = $buf; - let mut position = 1; - let mut input = b"tag>".as_ref(); - // ^= 5 + let mut position = 0; + let mut input = &b""[$skip..]; + // ^= 5 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b"tag") + Bytes(b"".as_ref(); - // ^= 3 + let mut position = 0; + let mut input = &b"<:>"[$skip..]; + // ^= 3 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b":") + Bytes(b"<:") ); assert_eq!(position, 3); } @@ -1875,13 +1878,13 @@ mod test { #[$test] $($async)? fn empty_ns() { let buf = $buf; - let mut position = 1; - let mut input = b":tag>".as_ref(); - // ^= 6 + let mut position = 0; + let mut input = &b"<:tag>"[$skip..]; + // ^= 6 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b":tag") + Bytes(b"<:tag") ); assert_eq!(position, 6); } @@ -1889,13 +1892,13 @@ mod test { #[$test] $($async)? fn with_attributes() { let buf = $buf; - let mut position = 1; - let mut input = br#"tag attr-1=">" attr2 = '>' 3attr>"#.as_ref(); - // ^= 39 + let mut position = 0; + let mut input = &br#""#[$skip..]; + // ^= 39 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(br#"tag attr-1=">" attr2 = '>' 3attr"#) + Bytes(br#"".as_ref(); - // ^= 3 + let mut position = 0; + let mut input = &b""[$skip..]; + // ^= 3 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b"/") + Bytes(b"".as_ref(); - // ^= 6 + let mut position = 0; + let mut input = &b""[$skip..]; + // ^= 6 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b"tag/") + Bytes(b"".as_ref(); - // ^= 4 + let mut position = 0; + let mut input = &b"<:/>"[$skip..]; + // ^= 4 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b":/") + Bytes(b"<:/") ); assert_eq!(position, 4); } @@ -1950,13 +1953,13 @@ mod test { #[$test] $($async)? fn empty_ns() { let buf = $buf; - let mut position = 1; - let mut input = b":tag/>".as_ref(); - // ^= 7 + let mut position = 0; + let mut input = &b"<:tag/>"[$skip..]; + // ^= 7 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b":tag/") + Bytes(b"<:tag/") ); assert_eq!(position, 7); } @@ -1964,13 +1967,13 @@ mod test { #[$test] $($async)? fn with_attributes() { let buf = $buf; - let mut position = 1; - let mut input = br#"tag attr-1="/>" attr2 = '/>' 3attr/>"#.as_ref(); - // ^= 42 + let mut position = 0; + let mut input = &br#""#[$skip..]; + // ^= 42 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(br#"tag attr-1="/>" attr2 = '/>' 3attr/"#) + Bytes(br#"".as_ref(); - // ^= 4 + let mut position = 0; + let mut input = &b""[$skip..]; + // ^= 4 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b"/ ") + Bytes(b"".as_ref(); - // ^= 6 + let mut position = 0; + let mut input = &b""[$skip..]; + // ^= 6 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b"/tag") + Bytes(b"".as_ref(); - // ^= 4 + let mut position = 0; + let mut input = &b""[$skip..]; + // ^= 4 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b"/:") + Bytes(b"".as_ref(); - // ^= 7 + let mut position = 0; + let mut input = &b""[$skip..]; + // ^= 7 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(b"/:tag") + Bytes(b"" attr2 = '>' 3attr>"#.as_ref(); - // ^= 40 + let mut position = 0; + let mut input = &br#""#[$skip..]; + // ^= 40 assert_eq!( Bytes($source(&mut input).read_with(ElementParser::default(), buf, &mut position) $(.$await)? .unwrap()), - Bytes(br#"/tag attr-1=">" attr2 = '>' 3attr"#) + Bytes(br#"<` +/// - `/r>` +/// +/// Passing of this test shows that `<` in the end of the first chunk does not +/// considered as an incomplete tag and parser correctly consumes this byte and +/// requests the next chunk. +#[test] +fn issue939() { + let xml_file = BufReader::with_capacity(4, &b""[..]); + let mut reader = Reader::from_reader(xml_file); + let mut buf = Vec::new(); + + assert_eq!( + reader.read_event_into(&mut buf).unwrap(), + Event::Start(BytesStart::new("r")) + ); + assert_eq!( + quick_xml::utils::Bytes(&buf), + quick_xml::utils::Bytes(b"