Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,21 @@

### New Features

- [#379]: Improved compliance with the XML attribute value normalization process by
adding `Attribute::normalized_value()` and `Attribute::normalized_value_with()`,
which ought to be used in place of `Attribute::unescape_value()` and
`Attribute::unescape_value_with()`

### Bug Fixes

- [#936]: Fix incorrect result of `.read_text()` when it is called after reading `Text` or `GeneralRef` event.

### Misc Changes

- [#379]: Added tests for attribute value normalization


[#379]: https://github.com/tafia/quick-xml/pull/379
[#936]: https://github.com/tafia/quick-xml/pull/936


Expand Down
12 changes: 4 additions & 8 deletions benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,13 @@ static INPUTS: &[(&str, &str)] = &[
("players.xml", PLAYERS),
];

// TODO: use fully normalized attribute values
fn parse_document_from_str(doc: &str) -> XmlResult<()> {
let mut r = Reader::from_str(doc);
loop {
match black_box(r.read_event()?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
black_box(attr?.decode_and_unescape_value(r.decoder())?);
black_box(attr?.decode_and_normalize_value(r.decoder())?);
}
}
Event::Text(e) => {
Expand All @@ -72,15 +71,14 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
let mut r = Reader::from_reader(doc);
let mut buf = Vec::new();
loop {
match black_box(r.read_event_into(&mut buf)?) {
Event::Start(e) | Event::Empty(e) => {
for attr in e.attributes() {
black_box(attr?.decode_and_unescape_value(r.decoder())?);
black_box(attr?.decode_and_normalize_value(r.decoder())?);
}
}
Event::Text(e) => {
Expand All @@ -98,15 +96,14 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
let mut r = NsReader::from_str(doc);
loop {
match black_box(r.read_resolved_event()?) {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
black_box(resolved_ns);
for attr in e.attributes() {
black_box(attr?.decode_and_unescape_value(r.decoder())?);
black_box(attr?.decode_and_normalize_value(r.decoder())?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand All @@ -125,7 +122,6 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> {
Ok(())
}

// TODO: use fully normalized attribute values
fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
let mut r = NsReader::from_reader(doc);
let mut buf = Vec::new();
Expand All @@ -134,7 +130,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> {
(resolved_ns, Event::Start(e) | Event::Empty(e)) => {
black_box(resolved_ns);
for attr in e.attributes() {
black_box(attr?.decode_and_unescape_value(r.decoder())?);
black_box(attr?.decode_and_normalize_value(r.decoder())?);
}
}
(resolved_ns, Event::Text(e)) => {
Expand Down
72 changes: 72 additions & 0 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
// criterion::black_box is deprecated in since criterion 0.7.
// Running benchmarks assumed on current Rust version, so this should be fine
#![allow(clippy::incompatible_msrv)]

use criterion::{self, criterion_group, criterion_main, Criterion};
use pretty_assertions::assert_eq;
use quick_xml::escape::{escape, unescape};
use quick_xml::events::attributes::Attribute;
use quick_xml::events::Event;
use quick_xml::name::QName;
use quick_xml::reader::{NsReader, Reader};
use std::borrow::Cow;
use std::hint::black_box;

static SAMPLE: &str = include_str!("../tests/documents/sample_rss.xml");
Expand Down Expand Up @@ -247,6 +250,74 @@ fn attributes(c: &mut Criterion) {
assert_eq!(count, 150);
})
});

group.finish();
}

/// Benchmarks normalizing attribute values
fn attribute_value_normalization(c: &mut Criterion) {
let mut group = c.benchmark_group("attribute_value_normalization");

group.bench_function("noop_short", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"foobar"),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("noop_long", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(LOREM_IPSUM_TEXT.as_bytes()),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("replacement_chars", |b| {
let attr = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"just a bit\n of text without\tany entities"),
};
b.iter(|| {
criterion::black_box(attr.normalized_value()).unwrap();
})
});

group.bench_function("char_reference", |b| {
let attr1 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"prefix &#34;some stuff&#34;,&#x22;more stuff&#x22;"),
};
let attr2 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"&#38;&#60;"),
};
b.iter(|| {
criterion::black_box(attr1.normalized_value()).unwrap();
criterion::black_box(attr2.normalized_value()).unwrap();
})
});

group.bench_function("entity_reference", |b| {
let attr1 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"age &gt; 72 &amp;&amp; age &lt; 21"),
};
let attr2 = Attribute {
key: QName(b"foo"),
value: Cow::Borrowed(b"&quot;what&apos;s that?&quot;"),
};
b.iter(|| {
criterion::black_box(attr1.normalized_value()).unwrap();
criterion::black_box(attr2.normalized_value()).unwrap();
})
});

group.finish();
}

Expand Down Expand Up @@ -359,6 +430,7 @@ criterion_group!(
read_resolved_event_into,
one_event,
attributes,
attribute_value_normalization,
escaping,
unescaping,
);
Expand Down
1 change: 1 addition & 0 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ impl From<EscapeError> for Error {
}

impl From<AttrError> for Error {
/// Creates a new `Error::InvalidAttr` from the given error
#[inline]
fn from(error: AttrError) -> Self {
Self::InvalidAttr(error)
Expand Down
Loading
Loading