Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,11 @@ name = "serde-de-enum"
required-features = ["serialize"]
path = "tests/serde-de-enum.rs"

[[test]]
name = "serde-de-references"
required-features = ["serialize"]
path = "tests/serde-de-references.rs"

[[test]]
name = "serde-de-seq"
required-features = ["serialize"]
Expand Down
9 changes: 9 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@

## Unreleased

The new `XmlReader` type was added that is automatically resolves general entity references.

`quick_xml::de::resolver` was replaced by `quick_xml::resolver` module.

### New Features

- [#938]: Add new enumeration `XmlVersion` and typified getter `BytesDecl::xml_version()`.
Expand All @@ -33,6 +37,10 @@

Deprecated functions now behaves the same as newly added.

- [#948]: Add `quick_xml::reader::EntityResolver` which is able to resolve external entities.
- [#948]: Add `quick_xml::reader::XmlReader`, a new high-level reader which should be preferred
over the old `Reader`.

### Bug Fixes

- [#938]: Use correct rules for EOL normalization in `Deserializer` when parse XML 1.0 documents.
Expand All @@ -51,6 +59,7 @@
[#914]: https://github.com/tafia/quick-xml/pull/914
[#938]: https://github.com/tafia/quick-xml/pull/938
[#944]: https://github.com/tafia/quick-xml/pull/944
[#948]: https://github.com/tafia/quick-xml/pull/948


## 0.39.2 -- 2026-02-20
Expand Down
22 changes: 21 additions & 1 deletion compare/benches/low-level.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use criterion::{self, criterion_group, criterion_main, BenchmarkId, Criterion, Throughput};
use pretty_assertions::assert_eq;
use quick_xml::events::Event;
use quick_xml::reader::Reader;
use quick_xml::reader::{self, Reader, XmlReader};
use std::hint::black_box;
use xml::reader::{EventReader, XmlEvent};

Expand Down Expand Up @@ -94,6 +94,26 @@ fn low_level_comparison(c: &mut Criterion) {
},
);

group.bench_with_input(
BenchmarkId::new("quick_xml:reader", filename),
*data,
|b, input| {
b.iter(|| {
let mut reader = XmlReader::from_str(input);
// TODO: reader.config_mut().check_end_names = false;
let mut count = black_box(0);
loop {
match reader.read_event() {
Ok(reader::Event::Start(_)) | Ok(reader::Event::Empty(_)) => count += 1,
Ok(reader::Event::Eof) => break,
_ => (),
}
}
assert_eq!(count, total_tags, "Overall tag count in {}", filename);
})
},
);

group.bench_with_input(
BenchmarkId::new("maybe_xml:0.10", filename),
*data,
Expand Down
224 changes: 224 additions & 0 deletions examples/high-level-entities.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
//! This example demonstrate how custom entities can be extracted from the DOCTYPE
//! and usage of the high-level `Reader` API.
//!
//! NB: this example is deliberately kept simple:
//! * the regex in this example is simple but brittle.

use std::borrow::Cow;
use std::collections::HashMap;
use std::convert::Infallible;
use std::fmt;
use std::io::{BufRead, Cursor};

use quick_xml::events::{BytesEnd, BytesStart, BytesText};
use quick_xml::reader::{
EntityResolver, EntityResolverFactory, Reader, ReplacementText, XmlEvent, XmlReader,
};
use regex::bytes::Regex;

use pretty_assertions::assert_eq;

const XML1: &str = r#"
<!DOCTYPE test [
<!ENTITY text "hello world" >
<!ENTITY element1 "<dtd attr = 'Message: &text;'/>" >
<!ENTITY element2 "<a> &element1; </a>" >
]>
<test label="Message: &text;">&element2;</test>
&external;
"#;

/// Additional document which in reality would be referenced by
/// `<!ENTITY external SYSTEM "URI to the document, for example, relative file path" >`
const XML2: &str = r#"
<?xml version='1.0'?>
<external>text</external>
"#;

struct MyResolver<'i> {
/// Map of captured internal _parsed general entities_. _Parsed_ means that
/// value of the entity is parsed by XML reader.
entities: HashMap<Cow<'i, [u8]>, Cow<'i, [u8]>>,
/// In this example we use simple regular expression to capture entities from DTD.
/// In real application you should use DTD parser.
entity_re: Regex,
}
impl<'i> fmt::Debug for MyResolver<'i> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_map()
.entries(self.entities.iter().map(|(k, v)| {
(
std::str::from_utf8(k).unwrap(),
std::str::from_utf8(v).unwrap(),
)
}))
.finish()
}
}

impl<'i> MyResolver<'i> {
fn new() -> Result<Self, regex::Error> {
Ok(Self {
entities: Default::default(),
// Capture "name" and "content" from such string:
// <!ENTITY name "content" >
entity_re: Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?,
})
}
fn capture_borrowed(&mut self, doctype: &'i [u8]) {
for cap in self.entity_re.captures_iter(doctype) {
self.entities.insert(
cap.get(1).unwrap().as_bytes().into(),
cap.get(2).unwrap().as_bytes().into(),
);
}
}
fn capture_owned(&mut self, doctype: Vec<u8>) {
for cap in self.entity_re.captures_iter(&doctype) {
self.entities.insert(
cap.get(1).unwrap().as_bytes().to_owned().into(),
cap.get(2).unwrap().as_bytes().to_owned().into(),
);
}
}
}

impl<'i> EntityResolverFactory<'i> for MyResolver<'i> {
type CaptureError = Infallible;
type Resolver = Self;

fn new_resolver(&mut self) -> Self::Resolver {
// We use valid regex so cannot fail
Self::new().unwrap()
}
}

impl<'i> EntityResolver<'i> for MyResolver<'i> {
type CaptureError = Infallible;

fn capture(&mut self, doctype: BytesText<'i>) -> Result<(), Self::CaptureError> {
dbg!(&doctype);
match doctype.into_inner() {
Cow::Borrowed(doctype) => self.capture_borrowed(doctype),
Cow::Owned(doctype) => self.capture_owned(doctype),
}
dbg!(self);
Ok(())
}

fn resolve<'e>(&self, entity: &str) -> Option<ReplacementText<'i, 'e>> {
dbg!((entity, self));
if entity == "external" {
return Some(ReplacementText::External(Box::new(Cursor::new(
XML2.as_bytes(),
))));
}
match self.entities.get(entity.as_bytes()) {
Some(replacement) => Some(ReplacementText::Internal(replacement.clone())),
None => None,
}
}
}

/// In this example the events will borrow from the first document
fn borrowed() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(XML1);
reader.config_mut().trim_text(true);

let mut r = XmlReader::borrowed(reader, MyResolver::new()?);

assert_eq!(
r.read_event()?,
XmlEvent::Start(BytesStart::from_content(
r#"test label="Message: &text;""#,
4
))
);

//--------------------------------------------------------------------------
// This part was inserted into original document from entity defined in DTD
assert_eq!(r.read_event()?, XmlEvent::Start(BytesStart::new("a")));
assert_eq!(
r.read_event()?,
XmlEvent::Empty(BytesStart::from_content(
r#"dtd attr = 'Message: &text;'"#,
3
))
);
assert_eq!(r.read_event()?, XmlEvent::End(BytesEnd::new("a")));
//--------------------------------------------------------------------------

assert_eq!(r.read_event()?, XmlEvent::End(BytesEnd::new("test")));

//--------------------------------------------------------------------------
// Start of external document
assert_eq!(
r.read_event()?,
XmlEvent::Start(BytesStart::new("external"))
);
assert_eq!(r.read_event()?, XmlEvent::Text(BytesText::new("text")));
assert_eq!(r.read_event()?, XmlEvent::End(BytesEnd::new("external")));
//--------------------------------------------------------------------------

assert_eq!(r.read_event()?, XmlEvent::Eof);

Ok(())
}

/// In this example the events will always copy data
fn buffered() -> Result<(), Box<dyn std::error::Error>> {
let boxed: Box<dyn BufRead> = Box::new(Cursor::new(XML1.as_bytes()));
let mut reader = Reader::from_reader(boxed);
reader.config_mut().trim_text(true);

let mut r = XmlReader::buffered(reader, MyResolver::new()?);

assert_eq!(
r.read_event()?,
XmlEvent::Start(BytesStart::from_content(
r#"test label="Message: &text;""#,
4
))
);

//--------------------------------------------------------------------------
// This part was inserted into original document from entity defined in DTD
assert_eq!(r.read_event()?, XmlEvent::Start(BytesStart::new("a")));
assert_eq!(
r.read_event()?,
XmlEvent::Empty(BytesStart::from_content(
r#"dtd attr = 'Message: &text;'"#,
3
))
);
assert_eq!(r.read_event()?, XmlEvent::End(BytesEnd::new("a")));
//--------------------------------------------------------------------------

assert_eq!(r.read_event()?, XmlEvent::End(BytesEnd::new("test")));

//--------------------------------------------------------------------------
// Start of external document
assert_eq!(
r.read_event()?,
XmlEvent::Start(BytesStart::new("external"))
);
assert_eq!(r.read_event()?, XmlEvent::Text(BytesText::new("text")));
assert_eq!(r.read_event()?, XmlEvent::End(BytesEnd::new("external")));
//--------------------------------------------------------------------------

assert_eq!(r.read_event()?, XmlEvent::Eof);

Ok(())
}

fn main() -> Result<(), Box<dyn std::error::Error>> {
println!("{}", XML1);
// In this example the events will borrow from the first document
borrowed()?;

println!("----------------------------------------------------------------");
println!("{}", XML1);
// In this example the events will always copy data
buffered()?;
Ok(())
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ use std::str::from_utf8;

use quick_xml::encoding::Decoder;
use quick_xml::errors::Error;
use quick_xml::escape::EscapeError;
use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event};
use quick_xml::name::QName;
use quick_xml::reader::Reader;
Expand Down Expand Up @@ -107,11 +106,10 @@ impl<'i> MyReader<'i> {
}
}

fn resolve(&self, entity: &[u8]) -> Result<&'i [u8], EscapeError> {
fn resolve(&self, entity: &[u8]) -> Result<&'i [u8], Error> {
match self.entities.get(entity) {
Some(replacement) => Ok(replacement),
None => Err(EscapeError::UnrecognizedEntity(
0..0,
None => Err(Error::UnrecognizedGeneralEntity(
String::from_utf8_lossy(entity).into_owned(),
)),
}
Expand Down
Loading
Loading