From 155057d8ca8ae29024caf01081caf7d4f4667fd4 Mon Sep 17 00:00:00 2001 From: bbbgan <2893129936@qq.com> Date: Sun, 9 Jul 2023 16:39:21 +0800 Subject: [PATCH] improve skip_till[xml] --- iguana/xml_reader.hpp | 39 +++++++------- iguana/xml_util.hpp | 119 ++++++++++++++++++++++++++++++++++++++++++ test/test_xml.cpp | 2 +- 3 files changed, 139 insertions(+), 21 deletions(-) diff --git a/iguana/xml_reader.hpp b/iguana/xml_reader.hpp index 5e37e971..f9acc0d5 100644 --- a/iguana/xml_reader.hpp +++ b/iguana/xml_reader.hpp @@ -74,11 +74,12 @@ IGUANA_INLINE void parse_attr(U &&value, It &&it, It &&end) { template IGUANA_INLINE void parse_item(U &value, It &&it, It &&end, std::string_view name) { - skip_till<'>'>(it, end); + skip_till_greater(it, end); ++it; skip_sapces_and_newline(it, end); auto value_begin = it; - auto value_end = skip_pass<'<'>(it, end); + auto value_end = skip_pass_smaller(it, end); + ; parse_value(value, value_begin, value_end); match_close_tag(it, end, name); } @@ -96,14 +97,14 @@ IGUANA_INLINE void parse_item(U &value, It &&it, It &&end, --it; return; } else { - skip_till<'>'>(it, end); + skip_till_greater(it, end); ++it; skip_sapces_and_newline(it, end); continue; } } auto start = it; - skip_till<' ', '>'>(it, end); + skip_till_greater_or_space(it, end); std::string_view key = std::string_view{ &*start, static_cast(std::distance(start, it))}; if (key != name) [[unlikely]] { @@ -119,8 +120,8 @@ template IGUANA_INLINE void parse_item(U &value, It &&it, It &&end, std::string_view name) { using value_type = typename std::remove_reference_t::value_type; - skip_till<'>'>(it, end); - if (*(it - 1) == '/') { + skip_till_greater(it, end); + if (*(it - 1) == '/') [[likely]] { ++it; return; } @@ -128,11 +129,12 @@ IGUANA_INLINE void parse_item(U &value, It &&it, It &&end, if constexpr (plain_t) { // The following code is for option not to be emplaced // when parse "...> <..." - skip_till<'>'>(it, end); + skip_till_greater(it, end); ++it; skip_sapces_and_newline(it, end); auto value_begin = it; - auto value_end = skip_pass<'<'>(it, end); + auto value_end = skip_pass_smaller(it, end); + ; if (value_begin == value_end) { match_close_tag(it, end, name); return; @@ -162,7 +164,7 @@ template IGUANA_INLINE void parse_item(T &value, It &&it, It &&end, std::string_view name) { constexpr auto cdata_idx = get_type_index>(); - skip_till<'>'>(it, end); + skip_till_greater(it, end); ++it; skip_sapces_and_newline(it, end); while (it != end) { @@ -174,7 +176,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end, return; // reach the close tag } else if (*it == '?') [[unlikely]] { // - skip_till<'>'>(it, end); + skip_till_greater(it, end); ++it; skip_sapces_and_newline(it, end); continue; @@ -212,14 +214,14 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end, } else if (*it == 'D') { // '>(it, end); + skip_till_greater(it, end); ++it; skip_sapces_and_newline(it, end); continue; } else { // ++it; - skip_till<'>'>(it, end); + skip_till_greater(it, end); ++it; skip_sapces_and_newline(it, end); continue; @@ -227,7 +229,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end, } auto start = it; - skip_till<' ', '>'>(it, end); + skip_till_greater_or_space(it, end); std::string_view key = std::string_view{ &*start, static_cast(std::distance(start, it))}; static constexpr auto frozen_map = get_iguana_struct_map(); @@ -259,14 +261,14 @@ IGUANA_INLINE void from_xml(U &value, It &&it, It &&end) { skip_sapces_and_newline(it, end); match<'<'>(it, end); if (*it == '?') { - skip_till<'>'>(it, end); + skip_till_greater(it, end); ++it; } else { break; } } auto start = it; - skip_till<' ', '>'>(it, end); + skip_till_greater_or_space(it, end); std::string_view key = std::string_view{&*start, static_cast(std::distance(start, it))}; detail::parse_attr(value.attr(), it, end); @@ -279,17 +281,14 @@ IGUANA_INLINE void from_xml(U &value, It &&it, It &&end) { skip_sapces_and_newline(it, end); match<'<'>(it, end); if (*it == '?') { - skip_till<'>'>(it, end); + skip_till_greater(it, end); ++it; // skip > - // ''>(it, end); + skip_till_greater_or_space(it, end); std::string_view key = std::string_view{&*start, static_cast(std::distance(start, it))}; detail::parse_item(value, it, end, key); diff --git a/iguana/xml_util.hpp b/iguana/xml_util.hpp index 9b98dd21..5da49dd6 100644 --- a/iguana/xml_util.hpp +++ b/iguana/xml_util.hpp @@ -253,4 +253,123 @@ IGUANA_INLINE void match_close_tag(auto &&it, auto &&end, // ++it; } +// skip_till<'>'>(it, end); +IGUANA_INLINE void skip_till_greater(auto &&it, auto &&end) { + static_assert(std::contiguous_iterator>); + + auto has_zero = [](uint64_t chunk) { + return (((chunk - 0x0101010101010101) & ~chunk) & 0x8080808080808080); + }; + auto has_greater = [&](uint64_t chunk) { + return has_zero( + chunk ^ + 0b0011111000111110001111100011111000111110001111100011111000111110); + }; + + if (std::distance(it, end) >= 7) [[likely]] { + const auto end_m7 = end - 7; + for (; it < end_m7; it += 8) { + const auto chunk = *reinterpret_cast(&*it); + uint64_t test = has_greater(chunk); + if (test != 0) { + it += (std::countr_zero(test) >> 3); + return; + } + } + } + + // Tail end of buffer. Should be rare we even get here + while (it < end) { + switch (*it) { + case '>': + return; + } + ++it; + } + throw std::runtime_error("Expected >"); +} + +IGUANA_INLINE void skip_till_greater_or_space(auto &&it, auto &&end) { + static_assert(std::contiguous_iterator>); + + auto has_zero = [](uint64_t chunk) { + return (((chunk - 0x0101010101010101) & ~chunk) & 0x8080808080808080); + }; + auto has_greater = [&](uint64_t chunk) { + return has_zero( + chunk ^ + 0b0011111000111110001111100011111000111110001111100011111000111110); + }; + auto has_space = [&](uint64_t chunk) { + return has_zero( + chunk ^ + 0b0010000000100000001000000010000000100000001000000010000000100000); + }; + if (std::distance(it, end) >= 7) [[likely]] { + const auto end_m7 = end - 7; + for (; it < end_m7; it += 8) { + const auto chunk = *reinterpret_cast(&*it); + uint64_t test = has_greater(chunk) | has_space(chunk); + if (test != 0) { + it += (std::countr_zero(test) >> 3); + return; + } + } + } + + // Tail end of buffer. Should be rare we even get here + while (it < end) { + switch (*it) { + case '>': + case ' ': + return; + } + ++it; + } + throw std::runtime_error("Expected > or space"); +} + +IGUANA_INLINE void skip_till_smaller(auto &&it, auto &&end) { + static_assert(std::contiguous_iterator>); + + auto has_zero = [](uint64_t chunk) { + return (((chunk - 0x0101010101010101) & ~chunk) & 0x8080808080808080); + }; + auto has_smaller = [&](uint64_t chunk) { + return has_zero( + chunk ^ + 0b0011110000111100001111000011110000111100001111000011110000111100); + }; + + if (std::distance(it, end) >= 7) [[likely]] { + const auto end_m7 = end - 7; + for (; it < end_m7; it += 8) { + const auto chunk = *reinterpret_cast(&*it); + uint64_t test = has_smaller(chunk); + if (test != 0) { + it += (std::countr_zero(test) >> 3); + return; + } + } + } + + // Tail end of buffer. Should be rare we even get here + while (it < end) { + switch (*it) { + case '>': + return; + } + ++it; + } + throw std::runtime_error("Expected >"); +} + +IGUANA_INLINE auto skip_pass_smaller(auto &&it, auto &&end) { + skip_till_smaller(it, end); + auto res = it++ - 1; + while (*res == ' ') { + --res; + } + return res + 1; +} } // namespace iguana \ No newline at end of file diff --git a/test/test_xml.cpp b/test/test_xml.cpp index 0295dcbf..89cb9266 100644 --- a/test/test_xml.cpp +++ b/test/test_xml.cpp @@ -108,7 +108,7 @@ TEST_CASE("test vector with attr") { 1 2 3 - 4 + 4 )"; test_arr_t arr;