diff --git a/Changelog.md b/Changelog.md index 2fd133ec..ddc2c597 100644 --- a/Changelog.md +++ b/Changelog.md @@ -16,12 +16,21 @@ ### New Features +- [#379]: Improved compliance with the XML attribute value normalization process by + adding `Attribute::normalized_value()` and `Attribute::normalized_value_with()`, + which ought to be used in place of `Attribute::unescape_value()` and + `Attribute::unescape_value_with()` + ### Bug Fixes - [#936]: Fix incorrect result of `.read_text()` when it is called after reading `Text` or `GeneralRef` event. ### Misc Changes +- [#379]: Added tests for attribute value normalization + + +[#379]: https://github.com/tafia/quick-xml/pull/379 [#936]: https://github.com/tafia/quick-xml/pull/936 diff --git a/benches/macrobenches.rs b/benches/macrobenches.rs index 77197b3e..c0267a74 100644 --- a/benches/macrobenches.rs +++ b/benches/macrobenches.rs @@ -48,14 +48,13 @@ static INPUTS: &[(&str, &str)] = &[ ("players.xml", PLAYERS), ]; -// TODO: use fully normalized attribute values fn parse_document_from_str(doc: &str) -> XmlResult<()> { let mut r = Reader::from_str(doc); loop { match black_box(r.read_event()?) { Event::Start(e) | Event::Empty(e) => { for attr in e.attributes() { - black_box(attr?.decode_and_unescape_value(r.decoder())?); + black_box(attr?.decode_and_normalize_value(r.decoder())?); } } Event::Text(e) => { @@ -72,7 +71,6 @@ fn parse_document_from_str(doc: &str) -> XmlResult<()> { Ok(()) } -// TODO: use fully normalized attribute values fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> { let mut r = Reader::from_reader(doc); let mut buf = Vec::new(); @@ -80,7 +78,7 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> { match black_box(r.read_event_into(&mut buf)?) { Event::Start(e) | Event::Empty(e) => { for attr in e.attributes() { - black_box(attr?.decode_and_unescape_value(r.decoder())?); + black_box(attr?.decode_and_normalize_value(r.decoder())?); } } Event::Text(e) => { @@ -98,7 +96,6 @@ fn parse_document_from_bytes(doc: &[u8]) -> XmlResult<()> { Ok(()) } -// TODO: use fully normalized attribute values fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> { let mut r = NsReader::from_str(doc); loop { @@ -106,7 +103,7 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> { (resolved_ns, Event::Start(e) | Event::Empty(e)) => { black_box(resolved_ns); for attr in e.attributes() { - black_box(attr?.decode_and_unescape_value(r.decoder())?); + black_box(attr?.decode_and_normalize_value(r.decoder())?); } } (resolved_ns, Event::Text(e)) => { @@ -125,7 +122,6 @@ fn parse_document_from_str_with_namespaces(doc: &str) -> XmlResult<()> { Ok(()) } -// TODO: use fully normalized attribute values fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> { let mut r = NsReader::from_reader(doc); let mut buf = Vec::new(); @@ -134,7 +130,7 @@ fn parse_document_from_bytes_with_namespaces(doc: &[u8]) -> XmlResult<()> { (resolved_ns, Event::Start(e) | Event::Empty(e)) => { black_box(resolved_ns); for attr in e.attributes() { - black_box(attr?.decode_and_unescape_value(r.decoder())?); + black_box(attr?.decode_and_normalize_value(r.decoder())?); } } (resolved_ns, Event::Text(e)) => { diff --git a/benches/microbenches.rs b/benches/microbenches.rs index f6af550f..2beefc28 100644 --- a/benches/microbenches.rs +++ b/benches/microbenches.rs @@ -2,12 +2,15 @@ // criterion::black_box is deprecated in since criterion 0.7. // Running benchmarks assumed on current Rust version, so this should be fine #![allow(clippy::incompatible_msrv)] + use criterion::{self, criterion_group, criterion_main, Criterion}; use pretty_assertions::assert_eq; use quick_xml::escape::{escape, unescape}; +use quick_xml::events::attributes::Attribute; use quick_xml::events::Event; use quick_xml::name::QName; use quick_xml::reader::{NsReader, Reader}; +use std::borrow::Cow; use std::hint::black_box; static SAMPLE: &str = include_str!("../tests/documents/sample_rss.xml"); @@ -247,6 +250,74 @@ fn attributes(c: &mut Criterion) { assert_eq!(count, 150); }) }); + + group.finish(); +} + +/// Benchmarks normalizing attribute values +fn attribute_value_normalization(c: &mut Criterion) { + let mut group = c.benchmark_group("attribute_value_normalization"); + + group.bench_function("noop_short", |b| { + let attr = Attribute { + key: QName(b"foo"), + value: Cow::Borrowed(b"foobar"), + }; + b.iter(|| { + criterion::black_box(attr.normalized_value()).unwrap(); + }) + }); + + group.bench_function("noop_long", |b| { + let attr = Attribute { + key: QName(b"foo"), + value: Cow::Borrowed(LOREM_IPSUM_TEXT.as_bytes()), + }; + b.iter(|| { + criterion::black_box(attr.normalized_value()).unwrap(); + }) + }); + + group.bench_function("replacement_chars", |b| { + let attr = Attribute { + key: QName(b"foo"), + value: Cow::Borrowed(b"just a bit\n of text without\tany entities"), + }; + b.iter(|| { + criterion::black_box(attr.normalized_value()).unwrap(); + }) + }); + + group.bench_function("char_reference", |b| { + let attr1 = Attribute { + key: QName(b"foo"), + value: Cow::Borrowed(b"prefix "some stuff","more stuff""), + }; + let attr2 = Attribute { + key: QName(b"foo"), + value: Cow::Borrowed(b"&<"), + }; + b.iter(|| { + criterion::black_box(attr1.normalized_value()).unwrap(); + criterion::black_box(attr2.normalized_value()).unwrap(); + }) + }); + + group.bench_function("entity_reference", |b| { + let attr1 = Attribute { + key: QName(b"foo"), + value: Cow::Borrowed(b"age > 72 && age < 21"), + }; + let attr2 = Attribute { + key: QName(b"foo"), + value: Cow::Borrowed(b""what's that?""), + }; + b.iter(|| { + criterion::black_box(attr1.normalized_value()).unwrap(); + criterion::black_box(attr2.normalized_value()).unwrap(); + }) + }); + group.finish(); } @@ -359,6 +430,7 @@ criterion_group!( read_resolved_event_into, one_event, attributes, + attribute_value_normalization, escaping, unescaping, ); diff --git a/src/errors.rs b/src/errors.rs index 9a529ca6..5f541766 100644 --- a/src/errors.rs +++ b/src/errors.rs @@ -253,6 +253,7 @@ impl From for Error { } impl From for Error { + /// Creates a new `Error::InvalidAttr` from the given error #[inline] fn from(error: AttrError) -> Self { Self::InvalidAttr(error) diff --git a/src/escape.rs b/src/escape.rs index a0fc6a38..d64cd7e1 100644 --- a/src/escape.rs +++ b/src/escape.rs @@ -5,6 +5,7 @@ use std::borrow::Cow; use std::fmt::{self, Write}; use std::num::ParseIntError; use std::ops::Range; +use std::slice::Iter; /// Error of parsing character reference (`&#;` or `&#x;`). #[derive(Clone, Debug, PartialEq)] @@ -51,6 +52,12 @@ pub enum EscapeError { /// Attempt to parse character reference (`&#;` or `&#x;`) /// was unsuccessful, not all characters are decimal or hexadecimal numbers. InvalidCharRef(ParseCharRefError), + /// The recursion limit was reached while attempting to unescape XML entities, + /// or normalize an attribute value. This could indicate an entity loop. + /// + /// Limiting recursion prevents Denial-of-Service type attacks + /// such as the "billion laughs" [attack](https://en.wikipedia.org/wiki/Billion_laughs_attack). + ReachedRecursionLimit(Range), } impl std::fmt::Display for EscapeError { @@ -61,12 +68,17 @@ impl std::fmt::Display for EscapeError { } Self::UnterminatedEntity(e) => write!( f, - "Error while escaping character at range {:?}: Cannot find ';' after '&'", + "Error while unescaping character at range {:?}: Cannot find ';' after '&'", e ), Self::InvalidCharRef(e) => { write!(f, "invalid character reference: {}", e) } + Self::ReachedRecursionLimit(e) => write!( + f, + "Error while unescaping entity at range {:?}: Recursion limit reached", + e + ), } } } @@ -517,6 +529,164 @@ fn normalize_xml10_eol_step( //////////////////////////////////////////////////////////////////////////////////////////////////// +const fn is_normalization_char(b: &u8) -> bool { + matches!(*b, b'\t' | b'\r' | b'\n' | b' ' | b'&') +} + +/// Returns the attribute value normalized as per [the XML specification], +/// using a custom entity resolver. +/// +/// Do not use this method with HTML attributes. +/// +/// Escape sequences such as `>` are replaced with their unescaped equivalents such as `>` +/// and the characters `\t`, `\r`, `\n` are replaced with whitespace characters. A function +/// for resolving entities can be provided as `resolve_entity`. Builtin entities will still +/// take precedence. +/// +/// This will allocate unless the raw attribute value does not require normalization. +/// +/// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize +pub(crate) fn normalize_attribute_value<'input, 'entity, F>( + value: &'input str, + resolve_entity: F, +) -> Result, EscapeError> +where + // the lifetime of the output comes from a capture or is `'static` + F: Fn(&str) -> Option<&'entity str>, +{ + const DEPTH: usize = 128; + + let bytes = value.as_bytes(); + let mut iter = bytes.iter(); + + if let Some(i) = iter.position(is_normalization_char) { + let mut normalized = String::with_capacity(value.len()); + let pos = normalize_step( + &mut normalized, + &mut iter, + value, + 0, + i, + DEPTH, + &resolve_entity, + )?; + + normalize_steps( + &mut normalized, + &mut iter, + value, + pos, + DEPTH, + &resolve_entity, + )?; + return Ok(normalized.into()); + } + Ok(Cow::Borrowed(value)) +} + +fn normalize_steps<'entity, F>( + normalized: &mut String, + iter: &mut Iter, + input: &str, + mut pos: usize, + depth: usize, + resolve_entity: &F, +) -> Result<(), EscapeError> +where + // the lifetime of the output comes from a capture or is `'static` + F: Fn(&str) -> Option<&'entity str>, +{ + while let Some(i) = iter.position(is_normalization_char) { + pos = normalize_step(normalized, iter, input, pos, pos + i, depth, resolve_entity)?; + } + if let Some(rest) = input.get(pos..) { + normalized.push_str(rest); + } + Ok(()) +} + +/// Performs one step of the [normalization algorithm] (but with recursive part): +/// +/// 1. For a character reference, append the referenced character +/// to the normalized value. +/// 2. For an entity reference, recursively apply this algorithm +/// to the replacement text of the entity. +/// 3. For a white space character (#x20, #xD, #xA, #x9), append +/// a space character (#x20) to the normalized value. +/// 4. For another character, append the character to the normalized value. +/// +/// # Parameters +/// +/// - `normalized`: Output of the algorithm. Normalized value will be placed here +/// - `iter`: Iterator over bytes of `input` +/// - `input`: Original non-normalized value +/// - `last_pos`: Index of the last byte in `input` that was processed +/// - `index`: Index of the byte in `input` that should be processed now +/// - `depth`: Current recursion depth. Too deep recursion will interrupt the algorithm +/// - `resolve_entity`: Resolver of entities. Returns `None` for unknown entities +/// +/// [normalization algorithm]: https://www.w3.org/TR/xml11/#AVNormalize +fn normalize_step<'entity, F>( + normalized: &mut String, + iter: &mut Iter, + input: &str, + last_pos: usize, + index: usize, + depth: usize, + resolve_entity: &F, +) -> Result +where + // the lifetime of the output comes from a capture or is `'static` + F: Fn(&str) -> Option<&'entity str>, +{ + // 4. For another character, append the character to the normalized value. + normalized.push_str(&input[last_pos..index]); + + match input.as_bytes()[index] { + b'&' => { + let start = index + 1; // +1 - skip `&` + let end = start + + match iter.position(|&b| b == b';') { + Some(end) => end, + None => return Err(EscapeError::UnterminatedEntity(index..input.len())), + }; + + // Content between & and ; - &pat; + let pat = &input[start..end]; + // 1. For a character reference, append the referenced character + // to the normalized value. + if let Some(entity) = pat.strip_prefix('#') { + let entity = &pat[1..]; // starts after the # + let codepoint = parse_number(entity).map_err(|e| EscapeError::InvalidCharRef(e))?; + normalized.push_str(codepoint.encode_utf8(&mut [0u8; 4])); + // 2. For an entity reference, recursively apply this algorithm + // to the replacement text of the entity. + } else if let Some(value) = resolve_entity(pat) { + normalize_steps( + normalized, + &mut value.as_bytes().iter(), + value, + 0, + depth.saturating_sub(1), + resolve_entity, + )?; + } else { + return Err(EscapeError::UnrecognizedEntity(start..end, pat.to_string())); + } + Ok(end + 1) // +1 - skip `;` + } + // 3. For a white space character (#x20, #xD, #xA, #x9), append + // a space character (#x20) to the normalized value. + b'\t' | b'\n' | b'\r' | b' ' => { + normalized.push(' '); + Ok(index + 1) // +1 - skip character + } + + // SAFETY: We call normalize_step only when is_normalization_char() return true + _ => unreachable!("Only '\\t', '\\n', '\\r', ' ', and '&' are possible here"), + } +} + /// Resolves predefined XML entities or all HTML5 entities depending on the feature /// [`escape-html`](https://docs.rs/quick-xml/latest/quick_xml/#escape-html). /// @@ -2285,4 +2455,137 @@ mod normalization { } } } + + mod attribute_value { + #[test] + fn empty() { + assert_eq!(normalize_attribute_value("", |_| { None }), Ok("".into())); + } + + #[test] + fn only_spaces() { + assert_eq!( + normalize_attribute_value(" ", |_| { None }), + Ok(" ".into()) + ); + assert_eq!( + normalize_attribute_value("\t\t\t", |_| { None }), + Ok(" ".into()) + ); + assert_eq!( + normalize_attribute_value("\r\r\r", |_| { None }), + Ok(" ".into()) + ); + assert_eq!( + normalize_attribute_value("\n\n\n", |_| { None }), + Ok(" ".into()) + ); + } + + #[test] + fn already_normalized() { + assert_eq!( + normalize_attribute_value("already normalized", |_| { None }), + Ok("already normalized".into()) + ); + } + + #[test] + fn characters() { + assert_eq!( + normalize_attribute_value("string with character", |_| { None }), + Ok("string with character".into()) + ); + assert_eq!( + normalize_attribute_value("string with character", |_| { None }), + Ok("string with character".into()) + ); + } + + #[test] + fn entities() { + assert_eq!( + normalize_attribute_value("string with &entity; reference", |_| { + Some("replacement") + }), + Ok("string with replacement reference".into()) + ); + assert_eq!( + normalize_attribute_value("string with &entity-1; reference", |entity| { + match entity { + "entity-1" => Some("recursive &entity-2;"), + "entity-2" => Some("entity 2"), + _ => None, + } + }), + Ok("string with recursive entity 2 reference".into()) + ); + } + + #[test] + fn unclosed_entity() { + assert_eq!( + normalize_attribute_value("string with unclosed &entity reference", |_| { + // 0 ^ = 21 ^ = 38 + Some("replacement") + }), + Err(EscapeError::UnterminatedEntity(21..38)) + ); + assert_eq!( + normalize_attribute_value( + "string with unclosed (character) reference", + |_| { + // 0 ^ = 21 ^ = 47 + None + } + ), + Err(EscapeError::UnterminatedEntity(21..47)) + ); + } + + #[test] + fn unknown_entity() { + assert_eq!( + normalize_attribute_value("string with unknown &entity; reference", |_| { None }), + // 0 ^ ^ = 21..27 + Err(EscapeError::UnrecognizedSymbol( + 21..27, + "entity".to_string() + )) + ); + } + + #[test] + fn unclosed_entity() { + assert_eq!( + normalize_attribute_value("string with unclosed &entity reference", |_| { + // 0 ^ = 21 ^ = 38 + Some("replacement") + }), + Err(EscapeError::UnterminatedEntity(21..38)) + ); + assert_eq!( + normalize_attribute_value( + "string with unclosed (character) reference", + |_| { + // 0 ^ = 21 ^ = 47 + None + } + ), + Err(EscapeError::UnterminatedEntity(21..47)) + ); + } + + #[test] + fn unknown_entity() { + assert_eq!( + normalize_attribute_value("string with unknown &entity; reference", |_| { None }), + // 0 ^ 21 ^ 27 + Err(EscapeError::UnrecognizedEntity( + 21..27, + "entity".to_string() + )) + ); + } + } } diff --git a/src/events/attributes.rs b/src/events/attributes.rs index f0c73d2a..56b9b356 100644 --- a/src/events/attributes.rs +++ b/src/events/attributes.rs @@ -4,9 +4,11 @@ use crate::encoding::Decoder; use crate::errors::Result as XmlResult; -use crate::escape::{escape, resolve_predefined_entity, unescape_with}; +#[cfg(any(doc, not(feature = "encoding")))] +use crate::escape::{self, escape, resolve_predefined_entity, unescape_with, EscapeError}; use crate::name::{LocalName, Namespace, NamespaceResolver, QName}; use crate::utils::{is_whitespace, Bytes}; +use crate::Reader; use std::fmt::{self, Debug, Display, Formatter}; use std::iter::FusedIterator; @@ -31,7 +33,100 @@ pub struct Attribute<'a> { } impl<'a> Attribute<'a> { - /// Decodes using UTF-8 then unescapes the value. + /// Returns the attribute value normalized as per [the XML specification]. + /// + /// Do not use this method with HTML attributes. + /// + /// Escape sequences such as `>` are replaced with their unescaped equivalents such as `>` + /// and the characters `\t`, `\r`, `\n` are replaced with whitespace characters. + /// + /// This will allocate unless the raw attribute value does not require normalization. + /// + /// See also [`normalized_value_with()`](#method.Self::normalized_value_with) + /// + /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize + #[cfg(any(doc, not(feature = "encoding")))] + pub fn normalized_value(&'a self) -> Result, EscapeError> { + self.normalized_value_with(|_| None) + } + + /// Returns the attribute value normalized as per [the XML specification], + /// using a custom entity resolver. + /// + /// Do not use this method with HTML attributes. + /// + /// Escape sequences such as `>` are replaced with their unescaped equivalents such as `>` + /// and the characters `\t`, `\r`, `\n` are replaced with whitespace characters. A function + /// for resolving entities can be provided as `resolve_entity`. Builtin entities will still + /// take precedence. + /// + /// This will allocate unless the raw attribute value does not require normalization. + /// + /// See also [`normalized_value()`](#method.normalized_value) + /// + /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize + #[cfg(any(doc, not(feature = "encoding")))] + pub fn normalized_value_with<'entity>( + &'a self, + resolve_entity: impl Fn(&str) -> Option<&'entity str>, + ) -> Result, EscapeError> { + escape::normalize_attribute_value( + std::str::from_utf8(self.value.as_ref()).expect("unable to decode as utf-8"), + resolve_entity, + ) + } + + /// Decodes using a provided reader and returns the attribute value normalized + /// as per [the XML specification]. + /// + /// Do not use this method with HTML attributes. + /// + /// Escape sequences such as `>` are replaced with their unescaped equivalents such as `>` + /// and the characters `\t`, `\r`, `\n` are replaced with whitespace characters. + /// + /// This will allocate unless the raw attribute value does not require normalization. + /// + /// See also [`decode_and_normalize_value_with()`](#method.decode_and_normalize_value_with) + /// + /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize + pub fn decode_and_normalize_value(&self, reader: &Reader) -> XmlResult> { + self.decode_and_normalize_value_with(reader, |_| None) + } + + /// Decodes using a provided reader and returns the attribute value normalized + /// as per [the XML specification], using a custom entity resolver. + /// + /// Do not use this method with HTML attributes. + /// + /// Escape sequences such as `>` are replaced with their unescaped equivalents such as `>` + /// and the characters `\t`, `\r`, `\n` are replaced with whitespace characters. A function + /// for resolving entities can be provided as `resolve_entity`. Builtin entities will still + /// take precedence. + /// + /// This will allocate unless the raw attribute value does not require normalization. + /// + /// See also [`decode_and_normalize_value()`](#method.decode_and_normalize_value) + /// + /// [the XML specification]: https://www.w3.org/TR/xml11/#AVNormalize + pub fn decode_and_normalize_value_with<'entity, B>( + &self, + reader: &Reader, + resolve_entity: impl Fn(&str) -> Option<&'entity str>, + ) -> XmlResult> { + let decoded = match &self.value { + Cow::Borrowed(bytes) => reader.decoder().decode(bytes)?, + // Convert to owned, because otherwise Cow will be bound with wrong lifetime + Cow::Owned(bytes) => reader.decoder().decode(bytes)?.into_owned().into(), + }; + + match escape::normalize_attribute_value(&decoded, resolve_entity)? { + // Because result is borrowed, no replacements was done and we can use original string + Cow::Borrowed(_) => Ok(decoded), + Cow::Owned(s) => Ok(s.into()), + } + } + + /// Returns the unescaped value. /// /// This is normally the value you are interested in. Escape sequences such as `>` are /// replaced with their unescaped equivalents such as `>`. @@ -1010,6 +1105,129 @@ mod xml { use super::*; use pretty_assertions::assert_eq; + #[cfg(any(doc, not(feature = "encoding")))] + mod attribute_value_normalization { + + use super::*; + use pretty_assertions::assert_eq; + + /// Empty values returned are unchanged + #[test] + fn test_empty() { + let raw_value = "".as_bytes(); + let attr = Attribute::from(("foo".as_bytes(), raw_value)); + assert_eq!(attr.normalized_value(), Ok(Cow::Borrowed(""))); + } + + /// Already normalized values are returned unchanged + #[test] + fn test_already_normalized() { + let raw_value = "foobar123".as_bytes(); + let output = "foobar123"; + let attr = Attribute::from(("foo".as_bytes(), raw_value)); + assert_eq!(attr.normalized_value(), Ok(Cow::Borrowed(output))); + } + + /// Return, tab, and newline characters (0xD, 0x9, 0xA) must be substituted with + /// a space character + #[test] + fn test_space_replacement() { + let raw_value = "\r\nfoo\rbar\tbaz\n\ndelta\n".as_bytes(); + let output = " foo bar baz delta ".to_owned(); + let attr = Attribute::from(("foo".as_bytes(), raw_value)); + assert_eq!(attr.normalized_value(), Ok(Cow::Owned(output))); + } + + /// Entities must be terminated + #[test] + fn test_unterminated_entity() { + let raw_value = "abc"def".as_bytes(); + let attr = Attribute::from(("foo".as_bytes(), raw_value)); + assert_eq!( + attr.normalized_value(), + Err(EscapeError::UnterminatedEntity(3..11)) + ); + } + + /// Unknown entities raise error + #[test] + fn test_unrecognized_entity() { + let raw_value = "abc&unkn;def".as_bytes(); + let attr = Attribute::from(("foo".as_bytes(), raw_value)); + assert_eq!( + attr.normalized_value(), + Err(EscapeError::UnrecognizedEntity(4..8, "unkn".to_owned())) + ); + } + + /// custom entity replacement works, entity replacement text processed recursively + #[test] + #[ignore] + fn test_entity_replacement() { + let raw_value = "&d;&d;A&a; &a;B&da;".as_bytes(); + let output = " A B ".to_owned(); + let attr = Attribute::from(("foo".as_bytes(), raw_value)); + fn custom_resolver(ent: &str) -> Option<&'static str> { + match ent { + "d" => Some(" "), + "a" => Some(" "), + "da" => Some(" "), + _ => None, + } + } + assert_eq!( + attr.normalized_value_with(&custom_resolver), + Ok(Cow::Owned(output)) + ); + } + + /// An obvious loop should be detected immediately + #[test] + fn test_entity_loop() { + let raw_value = "&d;".as_bytes(); + let attr = Attribute::from(("foo".as_bytes(), raw_value)); + fn custom_resolver(ent: &str) -> Option<&'static str> { + match ent { + "d" => Some("&d;"), + } + } + assert_eq!( + attr.normalized_value_with(&custom_resolver), + Err(EscapeError::ReachedRecursionLimit(1..2)) + ); + } + + /// A more complex entity loop should hit the recursion limit. + #[test] + fn test_recursion_limit() { + let raw_value = "&a;".as_bytes(); + let attr = Attribute::from(("foo".as_bytes(), raw_value)); + fn custom_resolver(ent: &str) -> Option<&'static str> { + match ent { + "a" => Some("&b;"), + "b" => Some("&c;"), + "c" => Some("&d;"), + "d" => Some("&e;"), + "e" => Some("&f;"), + "f" => Some("&a;"), + } + } + assert_eq!( + attr.normalized_value_with(&custom_resolver), + Err(EscapeError::ReachedRecursionLimit(1..2)) + ); + } + + #[test] + fn test_char_references() { + // character literal references are substituted without being replaced by spaces + let raw_value = " A B ".as_bytes(); + let output = "\r\rA\n\nB\r\n".to_owned(); + let attr = Attribute::from(("foo".as_bytes(), raw_value)); + assert_eq!(attr.normalized_value(), Ok(Cow::Owned(output))); + } + } + /// Checked attribute is the single attribute mod single { use super::*;