diff --git a/Changelog.md b/Changelog.md index 2fd133ec..4998265b 100644 --- a/Changelog.md +++ b/Changelog.md @@ -16,12 +16,19 @@ ### New Features +- [#598]: Add method `NamespaceResolver::set_level` which may be helpful in som circumstances. + ### Bug Fixes +- [#597]: Fix incorrect processing of namespace scopes in `NsReader::read_to_end` + `NsReader::read_to_end_into`, `NsReader::read_to_end_into_async` and `NsReader::read_text`. + The scope started by a start element was not ended after that call. - [#936]: Fix incorrect result of `.read_text()` when it is called after reading `Text` or `GeneralRef` event. ### Misc Changes +[#597]: https://github.com/tafia/quick-xml/issues/597 +[#598]: https://github.com/tafia/quick-xml/pull/598 [#936]: https://github.com/tafia/quick-xml/pull/936 diff --git a/src/name.rs b/src/name.rs index 09f675d2..b58a0095 100644 --- a/src/name.rs +++ b/src/name.rs @@ -5,7 +5,7 @@ use crate::events::attributes::Attribute; use crate::events::{BytesStart, Event}; -use crate::utils::write_byte_string; +use crate::utils::{write_byte_string, Bytes}; use memchr::memchr; use std::fmt::{self, Debug, Formatter}; use std::iter::FusedIterator; @@ -480,7 +480,7 @@ impl NamespaceBinding { /// prefixes into namespaces. /// /// Holds all internal logic to push/pop namespaces with their levels. -#[derive(Debug, Clone)] +#[derive(Clone)] pub struct NamespaceResolver { /// Buffer that contains names of namespace prefixes (the part between `xmlns:` /// and an `=`) and namespace values. @@ -492,6 +492,16 @@ pub struct NamespaceResolver { nesting_level: u16, } +impl Debug for NamespaceResolver { + fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { + f.debug_struct("NamespaceResolver") + .field("buffer", &Bytes(&self.buffer)) + .field("bindings", &self.bindings) + .field("nesting_level", &self.nesting_level) + .finish() + } +} + /// That constant define the one of [reserved namespaces] for the xml standard. /// /// The prefix `xml` is by definition bound to the namespace name @@ -674,11 +684,54 @@ impl NamespaceResolver { /// last call to [`Self::push()`] and [`Self::add()`]. /// /// [namespace bindings]: https://www.w3.org/TR/xml-names11/#dt-NSDecl + #[inline] pub fn pop(&mut self) { - self.nesting_level = self.nesting_level.saturating_sub(1); - let current_level = self.nesting_level; + self.set_level(self.nesting_level.saturating_sub(1)); + } + + /// Sets new number of [`push`] calls that were not followed by [`pop`] calls. + /// + /// When set to value lesser than current [`level`], behaves as if [`pop`] + /// will be called until the level reaches the corresponding value. + /// + /// When set to value bigger than current [`level`] just increases internal + /// counter. You may need to call [`pop`] more times that required before. + /// + /// # Example + /// + /// ``` + /// # use pretty_assertions::assert_eq; + /// # use quick_xml::events::BytesStart; + /// # use quick_xml::name::{Namespace, NamespaceResolver, PrefixDeclaration, QName, ResolveResult}; + /// # + /// let mut resolver = NamespaceResolver::default(); + /// + /// assert_eq!(resolver.level(), 0); + /// + /// resolver.push(&BytesStart::new("tag")); + /// assert_eq!(resolver.level(), 1); + /// + /// resolver.set_level(10); + /// assert_eq!(resolver.level(), 10); + /// + /// resolver.pop(); + /// assert_eq!(resolver.level(), 9); + /// + /// resolver.set_level(0); + /// assert_eq!(resolver.level(), 0); + /// + /// // pop from empty resolver does nothing + /// resolver.pop(); + /// assert_eq!(resolver.level(), 0); + /// ``` + /// + /// [`push`]: Self::push + /// [`pop`]: Self::pop + /// [`level`]: Self::level + pub fn set_level(&mut self, level: u16) { + self.nesting_level = level; // from the back (most deeply nested scope), look for the first scope that is still valid - match self.bindings.iter().rposition(|n| n.level <= current_level) { + match self.bindings.iter().rposition(|n| n.level <= level) { // none of the namespaces are valid, remove all of them None => { self.buffer.clear(); diff --git a/src/reader/async_tokio.rs b/src/reader/async_tokio.rs index 84a5681b..76e6138c 100644 --- a/src/reader/async_tokio.rs +++ b/src/reader/async_tokio.rs @@ -337,7 +337,11 @@ impl NsReader { ) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Config::check_end_names` documentation - self.reader.read_to_end_into_async(end, buf).await + let result = self.reader.read_to_end_into_async(end, buf).await?; + // read_to_end_into_async will consume closing tag. Because nobody can access to its + // content anymore, we directly pop namespace of the opening tag + self.ns_resolver.pop(); + Ok(result) } /// An asynchronous version of [`read_resolved_event_into()`]. Reads the next diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index a3b33746..a054f8d1 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -23,7 +23,7 @@ pub struct NsReader { /// An XML reader pub(super) reader: Reader, /// A buffer to manage namespaces - ns_resolver: NamespaceResolver, + pub(super) ns_resolver: NamespaceResolver, /// We cannot pop data from the namespace stack until returned `Empty` or `End` /// event will be processed by the user, so we only mark that we should that /// in the next [`Self::read_event_impl()`] call. @@ -604,7 +604,11 @@ impl NsReader { pub fn read_to_end_into(&mut self, end: QName, buf: &mut Vec) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Config::check_end_names` documentation - self.reader.read_to_end_into(end, buf) + let result = self.reader.read_to_end_into(end, buf)?; + // read_to_end_into will consume closing tag. Because nobody can access to its + // content anymore, we directly pop namespace of the opening tag + self.ns_resolver.pop(); + Ok(result) } } @@ -840,7 +844,11 @@ impl<'i> NsReader<&'i [u8]> { pub fn read_to_end(&mut self, end: QName) -> Result { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Config::check_end_names` documentation - self.reader.read_to_end(end) + let result = self.reader.read_to_end(end)?; + // read_to_end will consume closing tag. Because nobody can access to its + // content anymore, we directly pop namespace of the opening tag + self.ns_resolver.pop(); + Ok(result) } /// Reads content between start and end tags, including any markup. This @@ -910,7 +918,13 @@ impl<'i> NsReader<&'i [u8]> { /// [`decoder()`]: Reader::decoder() #[inline] pub fn read_text(&mut self, end: QName) -> Result> { - self.reader.read_text(end) + // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should + // match literally the start name. See `Self::check_end_names` documentation + let result = self.reader.read_text(end)?; + // read_text will consume closing tag. Because nobody can access to its + // content anymore, we directly pop namespace of the opening tag + self.ns_resolver.pop(); + Ok(result) } } diff --git a/src/reader/state.rs b/src/reader/state.rs index 9d708529..ef96ba7a 100644 --- a/src/reader/state.rs +++ b/src/reader/state.rs @@ -1,3 +1,5 @@ +use std::fmt::Debug; + #[cfg(feature = "encoding")] use encoding_rs::UTF_8; @@ -8,12 +10,12 @@ use crate::parser::{Parser, PiParser}; #[cfg(feature = "encoding")] use crate::reader::EncodingRef; use crate::reader::{BangType, Config, DtdParser, ParseState}; -use crate::utils::{is_whitespace, name_len}; +use crate::utils::{is_whitespace, name_len, Bytes}; /// A struct that holds a current reader state and a parser configuration. /// It is independent on a way of reading data: the reader feed data into it and /// get back produced [`Event`]s. -#[derive(Clone, Debug)] +#[derive(Clone)] pub(super) struct ReaderState { /// Number of bytes read from the source of data since the reader was created pub offset: u64, @@ -373,3 +375,21 @@ impl Default for ReaderState { } } } + +impl Debug for ReaderState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let mut d = f.debug_struct("ReaderState"); + + d.field("offset", &self.offset); + d.field("last_error_offset", &self.last_error_offset); + d.field("state", &self.state); + d.field("config", &self.config); + d.field("opened_buffer", &Bytes(&self.opened_buffer)); + d.field("opened_starts", &self.opened_starts); + + #[cfg(feature = "encoding")] + d.field("encoding", &self.encoding); + + d.finish() + } +} diff --git a/tests/issues.rs b/tests/issues.rs index 966aa9ad..799f7f17 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -8,8 +8,8 @@ use std::sync::mpsc; use quick_xml::errors::{Error, IllFormedError, SyntaxError}; use quick_xml::events::{BytesDecl, BytesEnd, BytesStart, BytesText, Event}; -use quick_xml::name::QName; -use quick_xml::reader::Reader; +use quick_xml::name::{Namespace, QName, ResolveResult}; +use quick_xml::reader::{NsReader, Reader}; use quick_xml::utils::Bytes; use pretty_assertions::assert_eq; @@ -190,6 +190,39 @@ fn issue590() { } } +#[test] +fn issue597() { + const S: &'static str = r#" + + + + + + + + + + "#; + + let mut reader = NsReader::from_str(S); + let objects_ns = loop { + let (ns, ev) = reader.read_resolved_event().unwrap(); + match ev { + Event::Start(v) if v.local_name().as_ref() == b"xmlfilecontent_test" => { + reader.read_to_end(v.name()).unwrap(); + } + Event::Empty(v) if v.local_name().as_ref() == b"objects" => break ns, + _ => (), + } + }; + assert_eq!( + objects_ns, + ResolveResult::Bound(Namespace( + b"http://oval.mitre.org/XMLSchema/oval-definitions-5" + )) + ); +} + /// Regression test for https://github.com/tafia/quick-xml/issues/604 mod issue604 { use super::*; diff --git a/tests/reader-namespaces.rs b/tests/reader-namespaces.rs index e4dbb5a6..fe7f1e35 100644 --- a/tests/reader-namespaces.rs +++ b/tests/reader-namespaces.rs @@ -1,6 +1,9 @@ use pretty_assertions::assert_eq; use quick_xml::events::attributes::Attribute; use quick_xml::events::Event::*; +use quick_xml::events::{ + BytesCData, BytesDecl, BytesEnd, BytesPI, BytesRef, BytesStart, BytesText, +}; use quick_xml::name::ResolveResult::*; use quick_xml::name::{Namespace, PrefixDeclaration, QName}; use quick_xml::reader::NsReader; @@ -499,3 +502,1048 @@ fn reserved_name() { ), } } + +mod read_to_end { + use super::*; + use pretty_assertions::assert_eq; + + /// Yes, this test contains invalid XML but since we can parse it, we check + /// that it does not break our parser + #[test] + fn decl() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event().unwrap(), + Decl(BytesDecl::new("1.0", None, None)) + ); + assert_eq!( + reader.read_to_end(QName(b"root")).unwrap(), + 45..65 // + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + /// Yes, this test contains invalid XML but since we can parse it, we check + /// that it does not break our parser + #[test] + fn doctype() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!(reader.read_event().unwrap(), DocType(BytesText::new("dtd"))); + assert_eq!( + reader.read_to_end(QName(b"root")).unwrap(), + 38..58 // + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn pi() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!(reader.read_event().unwrap(), PI(BytesPI::new("pi"))); + assert_eq!( + reader.read_to_end(QName(b"root")).unwrap(), + 30..50 // + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn comment() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event().unwrap(), + Comment(BytesText::new("comment")) + ); + assert_eq!( + reader.read_to_end(QName(b"root")).unwrap(), + 38..58 // + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn start() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + reader.config_mut().check_end_names = false; + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::new("tag")), + ) + ); + assert_eq!( + reader.read_to_end(QName(b"root")).unwrap(), + 29..49 // + ); + // NOTE: due to unbalanced XML namespace still not closed + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Empty(BytesStart::new("element")) + ) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn end() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + reader.config_mut().check_end_names = false; + reader.config_mut().allow_unmatched_ends = true; + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Bound(Namespace(b"namespace")), End(BytesEnd::new("tag")),) + ); + assert_eq!( + reader.read_to_end(QName(b"root")).unwrap(), + 30..50 // + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn empty() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Empty(BytesStart::new("tag")), + ) + ); + assert_eq!( + reader.read_to_end(QName(b"root")).unwrap(), + 30..50 // + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn text() { + let mut reader = NsReader::from_str( + "\ + \ + text\ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!(reader.read_event().unwrap(), Text(BytesText::new("text"))); + assert_eq!( + reader.read_to_end(QName(b"root")).unwrap(), + 28..48 // + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn cdata() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event().unwrap(), + CData(BytesCData::new("cdata")) + ); + assert_eq!( + reader.read_to_end(QName(b"root")).unwrap(), + 41..61 // + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn general_ref() { + let mut reader = NsReader::from_str( + "\ + \ + &entity;\ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event().unwrap(), + GeneralRef(BytesRef::new("entity")) + ); + assert_eq!( + reader.read_to_end(QName(b"root")).unwrap(), + 32..52 // + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } +} + +mod read_to_end_into { + use super::*; + use pretty_assertions::assert_eq; + + /// Yes, this test contains invalid XML but since we can parse it, we check + /// that it does not break our parser + #[test] + fn decl() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + let buf = &mut Vec::new(); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event_into(buf).unwrap(), + Decl(BytesDecl::new("1.0", None, None)) + ); + assert_eq!( + reader.read_to_end_into(QName(b"root"), buf).unwrap(), + 45..65 // + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), Eof); + } + + /// Yes, this test contains invalid XML but since we can parse it, we check + /// that it does not break our parser + #[test] + fn doctype() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + let buf = &mut Vec::new(); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event_into(buf).unwrap(), + DocType(BytesText::new("dtd")) + ); + assert_eq!( + reader.read_to_end_into(QName(b"root"), buf).unwrap(), + 38..58 // + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), Eof); + } + + #[test] + fn pi() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + let buf = &mut Vec::new(); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), PI(BytesPI::new("pi"))); + assert_eq!( + reader.read_to_end_into(QName(b"root"), buf).unwrap(), + 30..50 // + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), Eof); + } + + #[test] + fn comment() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + let buf = &mut Vec::new(); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event_into(buf).unwrap(), + Comment(BytesText::new("comment")) + ); + assert_eq!( + reader.read_to_end_into(QName(b"root"), buf).unwrap(), + 38..58 // + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), Eof); + } + + #[test] + fn start() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + let buf = &mut Vec::new(); + reader.config_mut().check_end_names = false; + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::new("tag")), + ) + ); + assert_eq!( + reader.read_to_end_into(QName(b"root"), buf).unwrap(), + 29..49 // + ); + // NOTE: due to unbalanced XML namespace still not closed + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Empty(BytesStart::new("element")) + ) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), Eof); + } + + #[test] + fn end() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + let buf = &mut Vec::new(); + reader.config_mut().check_end_names = false; + reader.config_mut().allow_unmatched_ends = true; + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + (Bound(Namespace(b"namespace")), End(BytesEnd::new("tag")),) + ); + assert_eq!( + reader.read_to_end_into(QName(b"root"), buf).unwrap(), + 30..50 // + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), Eof); + } + + #[test] + fn empty() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + let buf = &mut Vec::new(); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Empty(BytesStart::new("tag")), + ) + ); + assert_eq!( + reader.read_to_end_into(QName(b"root"), buf).unwrap(), + 30..50 // + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), Eof); + } + + #[test] + fn text() { + let mut reader = NsReader::from_str( + "\ + \ + text\ + \ + \ + \ + ", + ); + let buf = &mut Vec::new(); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event_into(buf).unwrap(), + Text(BytesText::new("text")) + ); + assert_eq!( + reader.read_to_end_into(QName(b"root"), buf).unwrap(), + 28..48 // + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), Eof); + } + + #[test] + fn cdata() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + let buf = &mut Vec::new(); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event_into(buf).unwrap(), + CData(BytesCData::new("cdata")) + ); + assert_eq!( + reader.read_to_end_into(QName(b"root"), buf).unwrap(), + 41..61 // + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), Eof); + } + + #[test] + fn general_ref() { + let mut reader = NsReader::from_str( + "\ + \ + &entity;\ + \ + \ + \ + ", + ); + let buf = &mut Vec::new(); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event_into(buf).unwrap(), + GeneralRef(BytesRef::new("entity")) + ); + assert_eq!( + reader.read_to_end_into(QName(b"root"), buf).unwrap(), + 32..52 // + ); + assert_eq!( + reader.read_resolved_event_into(buf).unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event_into(buf).unwrap(), Eof); + } +} + +mod read_text { + use super::*; + use pretty_assertions::assert_eq; + + /// Yes, this test contains invalid XML but since we can parse it, we check + /// that it does not break our parser + #[test] + fn decl() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event().unwrap(), + Decl(BytesDecl::new("1.0", None, None)) + ); + assert_eq!( + reader.read_text(QName(b"root")).unwrap(), + "" + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + /// Yes, this test contains invalid XML but since we can parse it, we check + /// that it does not break our parser + #[test] + fn doctype() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!(reader.read_event().unwrap(), DocType(BytesText::new("dtd"))); + assert_eq!( + reader.read_text(QName(b"root")).unwrap(), + "" + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn pi() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!(reader.read_event().unwrap(), PI(BytesPI::new("pi"))); + assert_eq!( + reader.read_text(QName(b"root")).unwrap(), + "" + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn comment() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event().unwrap(), + Comment(BytesText::new("comment")) + ); + assert_eq!( + reader.read_text(QName(b"root")).unwrap(), + "" + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn start() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + reader.config_mut().check_end_names = false; + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::new("tag")), + ) + ); + assert_eq!( + reader.read_text(QName(b"root")).unwrap(), + "" + ); + // NOTE: due to unbalanced XML namespace still not closed + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Empty(BytesStart::new("element")) + ) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn end() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + reader.config_mut().check_end_names = false; + reader.config_mut().allow_unmatched_ends = true; + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Bound(Namespace(b"namespace")), End(BytesEnd::new("tag")),) + ); + assert_eq!( + reader.read_text(QName(b"root")).unwrap(), + "" + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn empty() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Empty(BytesStart::new("tag")), + ) + ); + assert_eq!( + reader.read_text(QName(b"root")).unwrap(), + "" + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn text() { + let mut reader = NsReader::from_str( + "\ + \ + text\ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!(reader.read_event().unwrap(), Text(BytesText::new("text"))); + assert_eq!( + reader.read_text(QName(b"root")).unwrap(), + "" + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn cdata() { + let mut reader = NsReader::from_str( + "\ + \ + \ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event().unwrap(), + CData(BytesCData::new("cdata")) + ); + assert_eq!( + reader.read_text(QName(b"root")).unwrap(), + "" + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } + + #[test] + fn general_ref() { + let mut reader = NsReader::from_str( + "\ + \ + &entity;\ + \ + \ + \ + ", + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + ( + Bound(Namespace(b"namespace")), + Start(BytesStart::from_content("root xmlns='namespace'", 4)), + ) + ); + assert_eq!( + reader.read_event().unwrap(), + GeneralRef(BytesRef::new("entity")) + ); + assert_eq!( + reader.read_text(QName(b"root")).unwrap(), + "" + ); + assert_eq!( + reader.read_resolved_event().unwrap(), + (Unbound, Empty(BytesStart::new("element"))) + ); + assert_eq!(reader.read_event().unwrap(), Eof); + } +}