Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,12 @@

### Bug Fixes

- [#936]: Fix incorrect result of `.read_text()` when it is called after reading `Text` or `GeneralRef` event.

### Misc Changes

[#936]: https://github.com/tafia/quick-xml/pull/936


## 0.39.0 -- 2026-01-11

Expand Down
6 changes: 3 additions & 3 deletions src/parser/pi.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,12 @@ impl Parser for PiParser {

#[inline]
fn eof_error(self, content: &[u8]) -> SyntaxError {
// Check if content starts with "?xml" followed by whitespace, '?' or end.
// Check if content starts with "<?xml" followed by whitespace, '?' or end.
// This determines whether to report an unclosed XML declaration or PI.
// FIXME: Add support for UTF-8/ASCII incompatible encodings (UTF-16)
let is_xml_decl = content.starts_with(b"?xml")
let is_xml_decl = content.starts_with(b"<?xml")
&& content
.get(4)
.get(5)
.map_or(true, |&b| is_whitespace(b) || b == b'?');
if is_xml_decl {
SyntaxError::UnclosedXmlDecl
Expand Down
76 changes: 41 additions & 35 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,21 +73,15 @@ macro_rules! impl_buffered_source {
match memchr::memchr2(b'<', b'&', available) {
// Special handling is needed only on the first iteration.
// On next iterations we already read something and should emit Text event
Some(0) if read == 0 && available[0] == b'<' => {
self $(.$reader)? .consume(1);
*position += 1;
return ReadTextResult::Markup(buf);
}
Some(0) if read == 0 && available[0] == b'<' => return ReadTextResult::Markup(buf),
// Do not consume `&` because it may be lone and we would be need to
// return it as part of Text event
Some(0) if read == 0 => return ReadTextResult::Ref(buf),
Some(i) if available[i] == b'<' => {
buf.extend_from_slice(&available[..i]);

// +1 to skip `<`
let used = i + 1;
self $(.$reader)? .consume(used);
read += used as u64;
self $(.$reader)? .consume(i);
read += i as u64;

*position += read;
return ReadTextResult::UpToMarkup(&buf[start..]);
Expand Down Expand Up @@ -137,10 +131,10 @@ macro_rules! impl_buffered_source {
// should explicitly skip it at first iteration lest we confuse
// it with the end
if read == 0 {
debug_assert_eq!(
available.first(),
Some(&b'&'),
"`read_ref` must be called at `&`"
debug_assert!(
available.starts_with(b"&"),
"`read_ref` must be called at `&`:\n{:?}",
crate::utils::Bytes(available)
);
// If that ampersand is lone, then it will be part of text
// and we should keep it
Expand All @@ -151,31 +145,31 @@ macro_rules! impl_buffered_source {
}

match memchr::memchr3(b';', b'&', b'<', available) {
// Do not consume `&` because it may be lone and we would be need to
// return it as part of Text event
Some(i) if available[i] == b'&' => {
Some(i) if available[i] == b';' => {
buf.extend_from_slice(&available[..i]);

self $(.$reader)? .consume(i);
read += i as u64;
// +1 -- skip the end `;`
let used = i + 1;
self $(.$reader)? .consume(used);
read += used as u64;

*position += read;

return ReadRefResult::UpToRef(&buf[start..]);
return ReadRefResult::Ref(&buf[start..]);
}
// Do not consume `&` because it may be lone and we would be need to
// return it as part of Text event
Some(i) => {
let is_end = available[i] == b';';
let is_amp = available[i] == b'&';
buf.extend_from_slice(&available[..i]);

// +1 -- skip the end `;` or `<`
let used = i + 1;
self $(.$reader)? .consume(used);
read += used as u64;
self $(.$reader)? .consume(i);
read += i as u64;

*position += read;

return if is_end {
ReadRefResult::Ref(&buf[start..])
return if is_amp {
ReadRefResult::UpToRef(&buf[start..])
} else {
ReadRefResult::UpToMarkup(&buf[start..])
};
Expand Down Expand Up @@ -243,14 +237,20 @@ macro_rules! impl_buffered_source {
buf: &'b mut Vec<u8>,
position: &mut u64,
) -> Result<(BangType, &'b [u8])> {
// Peeked one bang ('!') before being called, so it's guaranteed to
// start with it.
// Peeked '<!' before being called, so it's guaranteed to start with it.
let start = buf.len();
let mut read = 1;
let mut read = 2;
buf.push(b'<');
buf.push(b'!');
self $(.$reader)? .consume(1);
self $(.$reader)? .consume(2);

let mut bang_type = BangType::new(self.peek_one() $(.$await)? ?)?;
let mut bang_type = loop {
break match self $(.$reader)? .fill_buf() $(.$await)? {
Ok(n) => BangType::new(n.first().cloned())?,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => return Err(Error::Io(e.into())),
};
};

loop {
match self $(.$reader)? .fill_buf() $(.$await)? {
Expand Down Expand Up @@ -310,13 +310,19 @@ macro_rules! impl_buffered_source {

#[inline]
$($async)? fn peek_one(&mut self) -> io::Result<Option<u8>> {
loop {
let available = loop {
break match self $(.$reader)? .fill_buf() $(.$await)? {
Ok(n) => Ok(n.first().cloned()),
Ok(n) => n,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => Err(e),
Err(e) => return Err(e),
};
}
};
debug_assert!(
available.starts_with(b"<"),
"markup must start from '<':\n{:?}",
crate::utils::Bytes(available)
);
Ok(available.get(1).cloned())
}
};
}
Expand Down
Loading