From c5506c31f3cd500bc599dfcf89457e5b66c1a4b1 Mon Sep 17 00:00:00 2001 From: Mingun Date: Fri, 20 Feb 2026 21:12:33 +0500 Subject: [PATCH] `read_text()` now returns `BytesText` which allows you to get the content with properly normalized EOLs --- Changelog.md | 3 +++ examples/read_nodes.rs | 9 ++++++++- src/reader/ns_reader.rs | 9 +++++---- src/reader/slice_reader.rs | 12 ++++++------ tests/issues.rs | 10 ++++++++-- tests/reader-namespaces.rs | 20 ++++++++++---------- tests/reader-read-text.rs | 20 ++++++++++---------- tests/reader.rs | 10 ++++++++-- 8 files changed, 58 insertions(+), 35 deletions(-) diff --git a/Changelog.md b/Changelog.md index c4ae643b..56b6e81f 100644 --- a/Changelog.md +++ b/Changelog.md @@ -44,10 +44,13 @@ of `NsReader`. Use `.resolver().<...>` methods instead. - [#938]: Now `BytesText::xml_content`, `BytesCData::xml_content` and `BytesRef::xml_content` accepts `XmlVersion` parameter to apply correct EOL normalization rules. +- [#944]: `read_text()` now returns `BytesText` which allows you to get the content with + properly normalized EOLs. To get the previous behavior use `.read_text().decode()?`. [#371]: https://github.com/tafia/quick-xml/issues/371 [#914]: https://github.com/tafia/quick-xml/pull/914 [#938]: https://github.com/tafia/quick-xml/pull/938 +[#944]: https://github.com/tafia/quick-xml/pull/944 ## 0.39.2 -- 2026-02-20 diff --git a/examples/read_nodes.rs b/examples/read_nodes.rs index 47063210..5fc9f058 100644 --- a/examples/read_nodes.rs +++ b/examples/read_nodes.rs @@ -2,6 +2,7 @@ // Note: for this specific data set using serde feature would simplify // this simple data is purely to make it easier to understand the code +use quick_xml::encoding::EncodingError; use quick_xml::events::attributes::AttrError; use quick_xml::events::{BytesStart, Event}; use quick_xml::name::QName; @@ -53,6 +54,12 @@ impl From for AppError { } } +impl From for AppError { + fn from(error: EncodingError) -> Self { + Self::Xml(quick_xml::Error::Encoding(error)) + } +} + #[derive(Debug)] struct Translation { tag: String, @@ -91,7 +98,7 @@ impl Translation { Ok(Translation { tag: tag.into(), lang: lang.into(), - text: text_content.into(), + text: text_content.decode()?.into(), }) } else { dbg!("Expected Event::Start for Text, got: {:?}", &event); diff --git a/src/reader/ns_reader.rs b/src/reader/ns_reader.rs index 4f6f68da..46858cc8 100644 --- a/src/reader/ns_reader.rs +++ b/src/reader/ns_reader.rs @@ -4,7 +4,6 @@ //! [qualified names]: https://www.w3.org/TR/xml-names11/#dt-qualname //! [expanded names]: https://www.w3.org/TR/xml-names11/#dt-expname -use std::borrow::Cow; use std::fs::File; use std::io::{BufRead, BufReader}; use std::ops::Deref; @@ -725,11 +724,13 @@ impl<'i> NsReader<&'i [u8]> { /// // ...then, we could read text content until close tag. /// // This call will correctly handle nested elements. /// let text = reader.read_text(end.name()).unwrap(); - /// assert_eq!(text, Cow::Borrowed(r#" + /// let text = text.decode().unwrap(); + /// assert_eq!(text, r#" /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" - /// "#)); + /// "#); + /// assert!(matches!(text, Cow::Borrowed(_))); /// /// // Now we can enable checks again /// reader.config_mut().check_end_names = true; @@ -741,7 +742,7 @@ impl<'i> NsReader<&'i [u8]> { /// [`Start`]: Event::Start /// [`decoder()`]: Reader::decoder() #[inline] - pub fn read_text(&mut self, end: QName) -> Result> { + pub fn read_text(&mut self, end: QName) -> Result> { // According to the https://www.w3.org/TR/xml11/#dt-etag, end name should // match literally the start name. See `Self::check_end_names` documentation let result = self.reader.read_text(end)?; diff --git a/src/reader/slice_reader.rs b/src/reader/slice_reader.rs index 2a27a866..65ac2796 100644 --- a/src/reader/slice_reader.rs +++ b/src/reader/slice_reader.rs @@ -2,7 +2,6 @@ //! underlying byte stream. This implementation supports not using an //! intermediate buffer as the byte slice itself can be used to borrow from. -use std::borrow::Cow; use std::io; #[cfg(feature = "encoding")] @@ -11,7 +10,7 @@ use crate::reader::EncodingRef; use encoding_rs::{Encoding, UTF_8}; use crate::errors::{Error, Result}; -use crate::events::Event; +use crate::events::{BytesText, Event}; use crate::name::QName; use crate::parser::Parser; use crate::reader::{BangType, ReadRefResult, ReadTextResult, Reader, Span, XmlSource}; @@ -209,11 +208,12 @@ impl<'a> Reader<&'a [u8]> { /// // ...then, we could read text content until close tag. /// // This call will correctly handle nested elements. /// let text = reader.read_text(end.name()).unwrap(); - /// assert_eq!(text, Cow::Borrowed(r#" + /// let text = text.decode().unwrap(); + /// assert_eq!(text, r#" /// This is a HTML text ///

Usual XML rules does not apply inside it ///

For example, elements not needed to be "closed" - /// "#)); + /// "#); /// assert!(matches!(text, Cow::Borrowed(_))); /// /// // Now we can enable checks again @@ -225,7 +225,7 @@ impl<'a> Reader<&'a [u8]> { /// /// [`Start`]: Event::Start /// [`decoder()`]: Self::decoder() - pub fn read_text(&mut self, end: QName) -> Result> { + pub fn read_text(&mut self, end: QName) -> Result> { // self.reader will be changed, so store original reference let buffer = self.reader; let span = self.read_to_end(end)?; @@ -233,7 +233,7 @@ impl<'a> Reader<&'a [u8]> { let len = span.end - span.start; // SAFETY: `span` can only contain indexes up to usize::MAX because it // was created from offsets from a single &[u8] slice - Ok(self.decoder().decode(&buffer[0..len as usize])?) + Ok(BytesText::wrap(&buffer[0..len as usize], self.decoder())) } } diff --git a/tests/issues.rs b/tests/issues.rs index b4d744a4..3cd26935 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -130,7 +130,10 @@ mod issue514 { reader.config_mut().check_end_names = false; - assert_eq!(reader.read_text(html_end.name()).unwrap(), "..."); + assert_eq!( + reader.read_text(html_end.name()).unwrap(), + BytesText::from_escaped("...") + ); reader.config_mut().check_end_names = true; @@ -153,7 +156,10 @@ mod issue514 { reader.config_mut().check_end_names = false; - assert_eq!(reader.read_text(html_end.name()).unwrap(), "..."); + assert_eq!( + reader.read_text(html_end.name()).unwrap(), + BytesText::from_escaped("...") + ); reader.config_mut().check_end_names = true; diff --git a/tests/reader-namespaces.rs b/tests/reader-namespaces.rs index 962bd200..90a85b9d 100644 --- a/tests/reader-namespaces.rs +++ b/tests/reader-namespaces.rs @@ -1235,7 +1235,7 @@ mod read_text { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_resolved_event().unwrap(), @@ -1267,7 +1267,7 @@ mod read_text { assert_eq!(reader.read_event().unwrap(), DocType(BytesText::new("dtd"))); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_resolved_event().unwrap(), @@ -1297,7 +1297,7 @@ mod read_text { assert_eq!(reader.read_event().unwrap(), PI(BytesPI::new("pi"))); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_resolved_event().unwrap(), @@ -1330,7 +1330,7 @@ mod read_text { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_resolved_event().unwrap(), @@ -1367,7 +1367,7 @@ mod read_text { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); // NOTE: due to unbalanced XML namespace still not closed assert_eq!( @@ -1406,7 +1406,7 @@ mod read_text { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_resolved_event().unwrap(), @@ -1442,7 +1442,7 @@ mod read_text { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_resolved_event().unwrap(), @@ -1472,7 +1472,7 @@ mod read_text { assert_eq!(reader.read_event().unwrap(), Text(BytesText::new("text"))); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_resolved_event().unwrap(), @@ -1505,7 +1505,7 @@ mod read_text { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_resolved_event().unwrap(), @@ -1538,7 +1538,7 @@ mod read_text { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_resolved_event().unwrap(), diff --git a/tests/reader-read-text.rs b/tests/reader-read-text.rs index fe8bb082..0720b64b 100644 --- a/tests/reader-read-text.rs +++ b/tests/reader-read-text.rs @@ -29,7 +29,7 @@ mod borrowed { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_event().unwrap(), @@ -61,7 +61,7 @@ mod borrowed { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_event().unwrap(), @@ -88,7 +88,7 @@ mod borrowed { assert_eq!(reader.read_event().unwrap(), Event::PI(BytesPI::new("pi"))); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_event().unwrap(), @@ -118,7 +118,7 @@ mod borrowed { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_event().unwrap(), @@ -149,7 +149,7 @@ mod borrowed { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_event().unwrap(), @@ -181,7 +181,7 @@ mod borrowed { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_event().unwrap(), @@ -211,7 +211,7 @@ mod borrowed { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_event().unwrap(), @@ -241,7 +241,7 @@ mod borrowed { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_event().unwrap(), @@ -271,7 +271,7 @@ mod borrowed { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_event().unwrap(), @@ -302,7 +302,7 @@ mod borrowed { ); assert_eq!( reader.read_text(QName(b"root")).unwrap(), - "" + BytesText::from_escaped("") ); assert_eq!( reader.read_event().unwrap(), diff --git a/tests/reader.rs b/tests/reader.rs index 41897f3d..52e2f36e 100644 --- a/tests/reader.rs +++ b/tests/reader.rs @@ -349,7 +349,10 @@ mod read_text { r.config_mut().trim_text(true); assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("tag"))); - assert_eq!(r.read_text(QName(b"tag")).unwrap(), " text "); + assert_eq!( + r.read_text(QName(b"tag")).unwrap(), + BytesText::from_escaped(" text ") + ); assert_eq!(r.read_event().unwrap(), Eof); } @@ -359,7 +362,10 @@ mod read_text { r.config_mut().trim_text(true); assert_eq!(r.read_event().unwrap(), Start(BytesStart::new("tag"))); - assert_eq!(r.read_text(QName(b"tag")).unwrap(), " "); + assert_eq!( + r.read_text(QName(b"tag")).unwrap(), + BytesText::from_escaped(" ") + ); assert_eq!(r.read_event().unwrap(), Eof); } }