Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,21 @@

### New Features

- [#938]: Add new enumeration `XmlVersion` and typified getter `BytesDecl::xml_version()`.
- [#938]: Add new error variant `IllFormedError::UnknownVersion`.

### Bug Fixes

- [#938]: Use correct rules for EOL normalization in `Deserializer` when parse XML 1.0 documents.
Previously XML 1.1. rules was applied.

### Misc Changes

- [#938]: Now `BytesText::xml_content`, `BytesCData::xml_content` and `BytesRef::xml_content`
accepts `XmlVersion` parameter to apply correct EOL normalization rules.

[#938]: https://github.com/tafia/quick-xml/pull/938


## 0.39.2 -- 2026-02-20

Expand All @@ -41,7 +52,7 @@

### New Features

- [#598]: Add method `NamespaceResolver::set_level` which may be helpful in som circumstances.
- [#598]: Add method `NamespaceResolver::set_level` which may be helpful in some circumstances.

### Bug Fixes

Expand Down
8 changes: 4 additions & 4 deletions benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
}
}
Event::Text(e) => {
black_box(e.xml_content()?);
black_box(e.xml10_content()?);
Copy link
Collaborator

@dralley dralley Feb 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why bother with this when xml_content() is what you would generally expect users to be using more frequently?

This makes it explicit, but I don't see why making it explicit matters in this case, given (as the commit message states) all documents declare being version 1.0

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Generally I expect that people will read Event::Decl, store .xml_version() from it and use that version in .xml_content(). For that xml_content() was changed in the next commit to accept XmlVersion.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my mind that still means we would want to benchmark .xml_content(XmlVersion::1_0), because that is how we expect it to be used. Especially for "macro" benchmarks.

I understand the practical difference is basically nonexistent, it's more philosophical. And just a suggestion. Not going to block over it.

}
Event::CData(e) => {
black_box(e.into_inner());
Expand All @@ -84,7 +84,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
}
}
Event::Text(e) => {
black_box(e.xml_content()?);
black_box(e.xml10_content()?);
}
Event::CData(e) => {
black_box(e.into_inner());
Expand All @@ -110,7 +110,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
}
}
(resolved_ns, Event::Text(e)) => {
black_box(e.xml_content()?);
black_box(e.xml10_content()?);
black_box(resolved_ns);
}
(resolved_ns, Event::CData(e)) => {
Expand Down Expand Up @@ -138,7 +138,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
}
}
(resolved_ns, Event::Text(e)) => {
black_box(e.xml_content()?);
black_box(e.xml10_content()?);
black_box(resolved_ns);
}
(resolved_ns, Event::CData(e)) => {
Expand Down
2 changes: 1 addition & 1 deletion benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ fn one_event(c: &mut Criterion) {
config.trim_text(true);
config.check_end_names = false;
match r.read_event() {
Ok(Event::Comment(e)) => nbtxt += e.xml_content().unwrap().len(),
Ok(Event::Comment(e)) => nbtxt += e.xml10_content().unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};

Expand Down
51 changes: 46 additions & 5 deletions src/de/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2107,6 +2107,7 @@ pub use self::attributes::AttributesDeserializer;
pub use self::resolver::{EntityResolver, PredefinedEntityResolver};
pub use self::simple_type::SimpleTypeDeserializer;
pub use crate::errors::serialize::DeError;
use crate::XmlVersion;

use crate::{
de::map::ElementMapAccess,
Expand Down Expand Up @@ -2391,8 +2392,12 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
}

match self.next_impl()? {
PayloadEvent::Text(e) => result.to_mut().push_str(&e.xml_content()?),
PayloadEvent::CData(e) => result.to_mut().push_str(&e.xml_content()?),
PayloadEvent::Text(e) => result
.to_mut()
.push_str(&e.xml_content(self.reader.xml_version())?),
PayloadEvent::CData(e) => result
.to_mut()
.push_str(&e.xml_content(self.reader.xml_version())?),
PayloadEvent::GeneralRef(e) => self.resolve_reference(result.to_mut(), e)?,

// SAFETY: current_event_is_last_text checks that event is Text, CData or GeneralRef
Expand All @@ -2408,8 +2413,10 @@ impl<'i, R: XmlRead<'i>, E: EntityResolver> XmlReader<'i, R, E> {
return match self.next_impl()? {
PayloadEvent::Start(e) => Ok(DeEvent::Start(e)),
PayloadEvent::End(e) => Ok(DeEvent::End(e)),
PayloadEvent::Text(e) => self.drain_text(e.xml_content()?),
PayloadEvent::CData(e) => self.drain_text(e.xml_content()?),
PayloadEvent::Text(e) => self.drain_text(e.xml_content(self.reader.xml_version())?),
PayloadEvent::CData(e) => {
self.drain_text(e.xml_content(self.reader.xml_version())?)
}
PayloadEvent::DocType(e) => {
self.entity_resolver
.capture(e)
Expand Down Expand Up @@ -3068,7 +3075,13 @@ where
let config = reader.config_mut();
config.expand_empty_elements = true;

Self::new(SliceReader { reader }, entity_resolver)
Self::new(
SliceReader {
reader,
version: XmlVersion::V1_0,
},
entity_resolver,
)
}
}

Expand Down Expand Up @@ -3148,6 +3161,7 @@ where
IoReader {
reader,
buf: Vec::new(),
version: XmlVersion::V1_0,
},
entity_resolver,
)
Expand All @@ -3167,6 +3181,7 @@ where
IoReader {
reader,
buf: Vec::new(),
version: XmlVersion::V1_0,
},
entity_resolver,
)
Expand Down Expand Up @@ -3391,6 +3406,9 @@ pub trait XmlRead<'i> {
/// when it cannot satisfy the lifetime.
fn read_to_end(&mut self, name: QName) -> Result<(), DeError>;

/// Return an XML version of the source.
fn xml_version(&self) -> XmlVersion;

/// A copy of the reader's decoder used to decode strings.
fn decoder(&self) -> Decoder;

Expand All @@ -3408,6 +3426,7 @@ pub trait XmlRead<'i> {
pub struct IoReader<R: BufRead> {
reader: NsReader<R>,
buf: Vec<u8>,
version: XmlVersion,
}

impl<R: BufRead> IoReader<R> {
Expand Down Expand Up @@ -3451,6 +3470,9 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
self.buf.clear();

let event = self.reader.read_event_into(&mut self.buf)?;
if let Event::Decl(e) = &event {
self.version = e.xml_version()?;
}
if let Some(event) = skip_uninterested(event) {
return Ok(event.into_owned());
}
Expand All @@ -3464,6 +3486,12 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
}
}

#[inline]
fn xml_version(&self) -> XmlVersion {
self.version
}

#[inline]
fn decoder(&self) -> Decoder {
self.reader.decoder()
}
Expand All @@ -3479,6 +3507,7 @@ impl<'i, R: BufRead> XmlRead<'i> for IoReader<R> {
/// [`Deserializer::from_str`].
pub struct SliceReader<'de> {
reader: NsReader<&'de [u8]>,
version: XmlVersion,
}

impl<'de> SliceReader<'de> {
Expand Down Expand Up @@ -3519,6 +3548,9 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
fn next(&mut self) -> Result<PayloadEvent<'de>, DeError> {
loop {
let event = self.reader.read_event()?;
if let Event::Decl(e) = &event {
self.version = e.xml_version()?;
}
if let Some(event) = skip_uninterested(event) {
return Ok(event);
}
Expand All @@ -3532,6 +3564,12 @@ impl<'de> XmlRead<'de> for SliceReader<'de> {
}
}

#[inline]
fn xml_version(&self) -> XmlVersion {
self.version
}

#[inline]
fn decoder(&self) -> Decoder {
self.reader.decoder()
}
Expand Down Expand Up @@ -4123,9 +4161,11 @@ mod tests {
let mut reader1 = IoReader {
reader: NsReader::from_reader(s.as_bytes()),
buf: Vec::new(),
version: XmlVersion::V1_0,
};
let mut reader2 = SliceReader {
reader: NsReader::from_str(s),
version: XmlVersion::V1_0,
};

loop {
Expand All @@ -4151,6 +4191,7 @@ mod tests {

let mut reader = SliceReader {
reader: NsReader::from_str(s),
version: XmlVersion::V1_0,
};

let config = reader.reader.config_mut();
Expand Down
5 changes: 5 additions & 0 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ pub enum IllFormedError {
///
/// [specification]: https://www.w3.org/TR/xml11/#sec-prolog-dtd
MissingDeclVersion(Option<String>),
/// XML version specified in the declaration neither 1.0 or 1.1.
UnknownVersion,
/// A document type definition (DTD) does not contain a name of a root element.
///
/// According to the [specification], document type definition (`<!DOCTYPE foo>`)
Expand Down Expand Up @@ -152,6 +154,9 @@ impl fmt::Display for IllFormedError {
Self::MissingDeclVersion(Some(attr)) => {
write!(f, "an XML declaration must start with `version` attribute, but in starts with `{}`", attr)
}
Self::UnknownVersion => {
f.write_str("unknown XML version: either 1.0 or 1.1 is expected")
}
Self::MissingDoctypeName => {
f.write_str("`<!DOCTYPE>` declaration does not contain a name of a document type")
}
Expand Down
87 changes: 81 additions & 6 deletions src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ use crate::escape::{
};
use crate::name::{LocalName, QName};
use crate::utils::{self, name_len, trim_xml_end, trim_xml_start, write_cow_string};
use crate::XmlVersion;
use attributes::{AttrError, Attribute, Attributes};

/// Opening tag data (`Event::Start`), with optional attributes: `<name attr="value">`.
Expand Down Expand Up @@ -659,8 +660,11 @@ impl<'a> BytesText<'a> {

/// Alias for [`xml11_content()`](Self::xml11_content).
#[inline]
pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
self.xml11_content()
pub fn xml_content(&self, version: XmlVersion) -> Result<Cow<'a, str>, EncodingError> {
match version {
XmlVersion::V1_0 => self.xml10_content(),
XmlVersion::V1_1 => self.xml11_content(),
}
}

/// Alias for [`xml10_content()`](Self::xml10_content).
Expand Down Expand Up @@ -967,8 +971,11 @@ impl<'a> BytesCData<'a> {

/// Alias for [`xml11_content()`](Self::xml11_content).
#[inline]
pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
self.xml11_content()
pub fn xml_content(&self, version: XmlVersion) -> Result<Cow<'a, str>, EncodingError> {
match version {
XmlVersion::V1_0 => self.xml10_content(),
XmlVersion::V1_1 => self.xml11_content(),
}
}

/// Alias for [`xml10_content()`](Self::xml10_content).
Expand Down Expand Up @@ -1432,6 +1439,71 @@ impl<'a> BytesDecl<'a> {
.transpose()
}

/// Gets XML version as typified enumeration.
///
/// According to the [grammar], the version *must* be the first thing in the declaration.
/// This method tries to extract the first thing in the declaration and return it.
/// In case of multiple attributes value of the first one is returned.
///
/// If version is missed in the declaration, or the first thing is not a version,
/// [`IllFormedError::MissingDeclVersion`] will be returned.
///
/// If version is not 1.0 or 1.1, [`IllFormedError::UnknownVersion`] will be returned.
///
/// # Examples
///
/// ```
/// use quick_xml::XmlVersion;
/// use quick_xml::errors::{Error, IllFormedError};
/// use quick_xml::events::{BytesDecl, BytesStart};
///
/// // <?xml version='1.1'?>
/// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.1'", 0));
/// assert_eq!(decl.xml_version().unwrap(), XmlVersion::V1_1);
///
/// // <?xml version='1.0' version='1.1'?>
/// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.0' version='1.1'", 0));
/// assert_eq!(decl.xml_version().unwrap(), XmlVersion::V1_0);
///
/// // <?xml version='1.2'?>
/// let decl = BytesDecl::from_start(BytesStart::from_content(" version='1.2'", 0));
/// match decl.xml_version() {
/// Err(Error::IllFormed(IllFormedError::UnknownVersion)) => {},
/// _ => assert!(false),
/// }
///
/// // <?xml encoding='utf-8'?>
/// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8'", 0));
/// match decl.xml_version() {
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
/// _ => assert!(false),
/// }
///
/// // <?xml encoding='utf-8' version='1.1'?>
/// let decl = BytesDecl::from_start(BytesStart::from_content(" encoding='utf-8' version='1.1'", 0));
/// match decl.xml_version() {
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(Some(key)))) => assert_eq!(key, "encoding"),
/// _ => assert!(false),
/// }
///
/// // <?xml?>
/// let decl = BytesDecl::from_start(BytesStart::from_content("", 0));
/// match decl.xml_version() {
/// Err(Error::IllFormed(IllFormedError::MissingDeclVersion(None))) => {},
/// _ => assert!(false),
/// }
/// ```
///
/// [grammar]: https://www.w3.org/TR/xml11/#NT-XMLDecl
pub fn xml_version(&self) -> Result<XmlVersion, Error> {
let v = self.version()?;
match v.as_ref() {
b"1.0" => Ok(XmlVersion::V1_0),
b"1.1" => Ok(XmlVersion::V1_1),
_ => Err(Error::IllFormed(IllFormedError::UnknownVersion)),
}
}

/// Gets the actual encoding using [_get an encoding_](https://encoding.spec.whatwg.org/#concept-encoding-get)
/// algorithm.
///
Expand Down Expand Up @@ -1614,8 +1686,11 @@ impl<'a> BytesRef<'a> {

/// Alias for [`xml11_content()`](Self::xml11_content).
#[inline]
pub fn xml_content(&self) -> Result<Cow<'a, str>, EncodingError> {
self.xml11_content()
pub fn xml_content(&self, version: XmlVersion) -> Result<Cow<'a, str>, EncodingError> {
match version {
XmlVersion::V1_0 => self.xml10_content(),
XmlVersion::V1_1 => self.xml11_content(),
}
}

/// Alias for [`xml10_content()`](Self::xml10_content).
Expand Down
19 changes: 19 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,3 +78,22 @@ pub use crate::errors::serialize::{DeError, SeError};
pub use crate::errors::{Error, Result};
pub use crate::reader::{NsReader, Reader};
pub use crate::writer::{ElementWriter, Writer};

/// Version of XML standard
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum XmlVersion {
/// [Version 1.0], which is the default version of XML document if XML declaration
/// is missed. Most documents in the world are still XML 1.0 documents.
///
/// [Version 1.0]: https://www.w3.org/TR/xml/
V1_0,
/// [Version 1.1](https://www.w3.org/TR/xml11/)
V1_1,
}

impl Default for XmlVersion {
#[inline]
fn default() -> Self {
Self::V1_0
}
}
Loading