From 47c764362feb7293f3b4008d01347d8fcf9689df Mon Sep 17 00:00:00 2001 From: qicosmos Date: Wed, 15 May 2024 17:10:27 +0800 Subject: [PATCH] fix and update --- .../ylt/standalone/iguana/detail/charconv.h | 44 ++++- .../ylt/standalone/iguana/detail/traits.hpp | 9 + include/ylt/standalone/iguana/json_reader.hpp | 71 +++++++- include/ylt/standalone/iguana/json_util.hpp | 29 ++- include/ylt/standalone/iguana/json_writer.hpp | 2 + include/ylt/standalone/iguana/reflection.hpp | 114 ++++++++++++ include/ylt/standalone/iguana/util.hpp | 129 +++++++++---- include/ylt/standalone/iguana/version.hpp | 8 + include/ylt/standalone/iguana/xml_reader.hpp | 169 +++++++++++------- include/ylt/standalone/iguana/xml_util.hpp | 151 +++++++++++++++- include/ylt/standalone/iguana/xml_writer.hpp | 64 ++++++- include/ylt/standalone/iguana/yaml_reader.hpp | 5 +- 12 files changed, 668 insertions(+), 127 deletions(-) create mode 100644 include/ylt/standalone/iguana/version.hpp diff --git a/include/ylt/standalone/iguana/detail/charconv.h b/include/ylt/standalone/iguana/detail/charconv.h index 0e81a39cd..a5b2d0c73 100644 --- a/include/ylt/standalone/iguana/detail/charconv.h +++ b/include/ylt/standalone/iguana/detail/charconv.h @@ -3,27 +3,48 @@ #include "dragonbox_to_chars.h" #include "fast_float.h" +#include "iguana/define.h" #include "itoa.hpp" namespace iguana { template struct is_char_type - : std::disjunction, std::is_same, - std::is_same, std::is_same, + : std::disjunction, std::is_same, std::is_same, std::is_same> {}; +inline void *to_chars_float(...) { + throw std::runtime_error("not allowed to invoke"); + return {}; +} + +template (), std::declval()))> +using return_of_tochars = std::conditional_t, + std::true_type, std::false_type>; +// here std::true_type is used as a type , any other type is also ok. +using has_to_chars_float = iguana::return_of_tochars; + namespace detail { -template + +// check_number==true: check if the string [first, last) is a legal number +template std::pair from_chars(const char *first, - const char *last, - U &value) noexcept { + const char *last, U &value) { using T = std::decay_t; if constexpr (std::is_floating_point_v) { auto [p, ec] = fast_float::from_chars(first, last, value); + if constexpr (check_number) { + if (p != last || ec != std::errc{}) + IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); } + } return {p, ec}; } else { auto [p, ec] = std::from_chars(first, last, value); + if constexpr (check_number) { + if (p != last || ec != std::errc{}) + IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); } + } return {p, ec}; } } @@ -33,7 +54,12 @@ template char *to_chars(char *buffer, T value) noexcept { using U = std::decay_t; if constexpr (std::is_floating_point_v) { - return jkj::dragonbox::to_chars(value, buffer); + if constexpr (has_to_chars_float::value) { + return static_cast(to_chars_float(value, buffer)); + } + else { + return jkj::dragonbox::to_chars(value, buffer); + } } else if constexpr (std::is_signed_v && (sizeof(U) >= 8)) { return xtoa(value, buffer, 10, 1); // int64_t @@ -41,9 +67,13 @@ char *to_chars(char *buffer, T value) noexcept { else if constexpr (std::is_unsigned_v && (sizeof(U) >= 8)) { return xtoa(value, buffer, 10, 0); // uint64_t } - else if constexpr (std::is_integral_v && !is_char_type::value) { + else if constexpr (std::is_integral_v && (sizeof(U) > 1)) { return itoa_fwd(value, buffer); // only support more than 2 bytes intergal } + else if constexpr (!is_char_type::value) { + return itoa_fwd(static_cast(value), + buffer); // only support more than 2 bytes intergal + } else { static_assert(!sizeof(U), "only support arithmetic type except char type"); } diff --git a/include/ylt/standalone/iguana/detail/traits.hpp b/include/ylt/standalone/iguana/detail/traits.hpp index 5938f4a7f..64f082e03 100644 --- a/include/ylt/standalone/iguana/detail/traits.hpp +++ b/include/ylt/standalone/iguana/detail/traits.hpp @@ -67,6 +67,15 @@ template struct has_type> : std::disjunction...> {}; +template +struct member_tratis {}; + +template +struct member_tratis { + using owner_type = Owner; + using value_type = T; +}; + template inline constexpr bool is_int64_v = std::is_same_v || std::is_same_v; diff --git a/include/ylt/standalone/iguana/json_reader.hpp b/include/ylt/standalone/iguana/json_reader.hpp index 1597d93bf..050183492 100644 --- a/include/ylt/standalone/iguana/json_reader.hpp +++ b/include/ylt/standalone/iguana/json_reader.hpp @@ -68,8 +68,8 @@ IGUANA_INLINE void from_json_impl(U &value, It &&it, It &&end) { if (size == 0) IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); } const auto start = &*it; - auto [p, ec] = detail::from_chars(start, start + size, value); - if (ec != std::errc{}) + auto [p, ec] = detail::from_chars(start, start + size, value); + if (ec != std::errc{} || !can_follow_number(*p)) IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); } it += (p - &*it); } @@ -82,9 +82,7 @@ IGUANA_INLINE void from_json_impl(U &value, It &&it, It &&end) { buffer[i] = *it++; ++i; } - auto [p, ec] = detail::from_chars(buffer, buffer + i, value); - if (ec != std::errc{}) - IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); } + detail::from_chars(buffer, buffer + i, value); } } @@ -499,6 +497,44 @@ IGUANA_INLINE void skip_object_value(It &&it, It &&end) { } } +template +IGUANA_INLINE bool from_json_variant_impl(U &value, It it, It end, It &temp_it, + It &temp_end) { + try { + value_type val; + from_json_impl(val, it, end); + value = val; + temp_it = it; + temp_end = end; + return true; + } catch (std::exception &ex) { + return false; + } +} + +template +IGUANA_INLINE void from_json_variant(U &value, It &it, It &end, + std::index_sequence) { + static_assert(!has_duplicate_type_v>, + "don't allow same type in std::variant"); + bool r = false; + It temp_it = it; + It temp_end = end; + ((void)(!r && (r = from_json_variant_impl< + variant_element_t>>( + value, it, end, temp_it, temp_end), + true)), + ...); + it = temp_it; + end = temp_end; +} + +template , int> = 0> +IGUANA_INLINE void from_json_impl(U &value, It &&it, It &&end) { + from_json_variant(value, it, end, + std::make_index_sequence< + std::variant_size_v>>{}); +} } // namespace detail template , int>> @@ -608,6 +644,31 @@ IGUANA_INLINE void from_json(T &value, const View &view) { from_json(value, std::begin(view), std::end(view)); } +template < + auto member, + typename Parant = typename member_tratis::owner_type, + typename T> +IGUANA_INLINE void from_json(T &value, std::string_view str) { + constexpr size_t duplicate_count = + iguana::duplicate_count, member>(); + static_assert(duplicate_count != 1, "the member is not belong to the object"); + static_assert(duplicate_count == 2, "has duplicate field name"); + + constexpr auto name = name_of(); + constexpr size_t index = index_of(); + constexpr size_t member_count = member_count_of(); + str = str.substr(str.find(name) + name.size()); + size_t pos = str.find(":") + 1; + if constexpr (index == member_count - 1) { // last field + str = str.substr(pos, str.find("}") - pos + 1); + } + else { + str = str.substr(pos, str.find(",") - pos); + } + + detail::from_json_impl(value.*member, std::begin(str), std::end(str)); +} + template , int> = 0> IGUANA_INLINE void from_json(T &value, const View &view, diff --git a/include/ylt/standalone/iguana/json_util.hpp b/include/ylt/standalone/iguana/json_util.hpp index 5c00c0771..f1bf3a331 100644 --- a/include/ylt/standalone/iguana/json_util.hpp +++ b/include/ylt/standalone/iguana/json_util.hpp @@ -18,10 +18,7 @@ class numeric_str { if (val_.empty()) IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); } T res; - auto [_, ec] = - detail::from_chars(val_.data(), val_.data() + val_.size(), res); - if (ec != std::errc{}) - IGUANA_UNLIKELY { throw std::runtime_error("Failed to parse number"); } + detail::from_chars(val_.data(), val_.data() + val_.size(), res); return res; } @@ -214,4 +211,28 @@ IGUANA_INLINE bool is_numeric(char c) noexcept { return static_cast(is_num[static_cast(c)]); } +// '\t' '\r' '\n' '"' '}' ']' ',' ' ' '\0' +IGUANA_INLINE bool can_follow_number(char c) noexcept { + static constexpr int can_follow_num[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, // 0 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 1 + 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, // 2 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 3 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, // 5 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, // 7 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 8 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 9 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // A + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // B + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // C + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // D + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // E + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 // F + }; + return static_cast(can_follow_num[static_cast(c)]); +} + } // namespace iguana diff --git a/include/ylt/standalone/iguana/json_writer.hpp b/include/ylt/standalone/iguana/json_writer.hpp index 80e2558da..eeaeaa399 100644 --- a/include/ylt/standalone/iguana/json_writer.hpp +++ b/include/ylt/standalone/iguana/json_writer.hpp @@ -245,6 +245,8 @@ IGUANA_INLINE void to_json_impl(Stream &s, T &&t) { template , int>> IGUANA_INLINE void to_json_impl(Stream &s, T &&t) { + static_assert(!has_duplicate_type_v>, + "don't allow same type in std::variant"); std::visit( [&s](auto value) { to_json_impl(s, value); diff --git a/include/ylt/standalone/iguana/reflection.hpp b/include/ylt/standalone/iguana/reflection.hpp index 41e15423b..234da95d6 100644 --- a/include/ylt/standalone/iguana/reflection.hpp +++ b/include/ylt/standalone/iguana/reflection.hpp @@ -910,6 +910,120 @@ constexpr const std::string_view get_name() { return M::name(); } +namespace detail { +template +constexpr bool get_index_imple(T ptr, U ele) { + if constexpr (std::is_same_v) { + if (ele == ptr) { + return true; + } + else { + return false; + } + } + else { + return false; + } +} + +template +constexpr size_t member_index_impl(T ptr, Tuple &tp, + std::index_sequence) { + bool r = false; + size_t index = 0; + ((void)(!r && (r = get_index_imple(ptr, std::get(tp)), + !r ? index++ : index, true)), + ...); + return index; +} + +template +constexpr size_t member_index(T ptr, Tuple &tp) { + return member_index_impl( + ptr, tp, + std::make_index_sequence< + std::tuple_size_v>>{}); +} +} // namespace detail + +template +constexpr size_t index_of() { + using namespace detail; + using T = typename member_tratis::owner_type; + using M = Reflect_members; + constexpr auto tp = M::apply_impl(); + constexpr size_t Size = std::tuple_size_v; + constexpr size_t index = member_index(member, tp); + static_assert(index < Size, "out of range"); + return index; +} + +template +constexpr std::array indexs_of() { + return std::array{index_of()...}; +} + +template +constexpr auto name_of() { + using T = typename member_tratis::owner_type; + using M = Reflect_members; + constexpr auto s = M::arr()[index_of()]; + return std::string_view(s.data(), s.size()); +} + +template +constexpr std::array names_of() { + return std::array{ + name_of()...}; +} + +template +constexpr auto member_count_of() { + using T = typename member_tratis::owner_type; + using M = Reflect_members; + return M::value(); +} + +template +constexpr size_t duplicate_count(); + +template +constexpr void check_duplicate(Member member, size_t &index) { + using value_type = typename member_tratis::value_type; + + if (detail::get_index_imple(ptr, member)) { + index++; + } + + if constexpr (is_reflection_v) { + index += iguana::duplicate_count(); + } +} + +template +constexpr size_t duplicate_count() { + using M = Reflect_members; + constexpr auto name = name_of(); + constexpr auto arr = M::arr(); + + constexpr auto tp = M::apply_impl(); + size_t index = 0; + std::apply( + [&](auto... ele) { + (check_duplicate(ele, index), ...); + }, + tp); + + for (auto &s : arr) { + if (s == name) { + index++; + break; + } + } + + return index; +} + template constexpr const std::string_view get_fields() { using M = Reflect_members; diff --git a/include/ylt/standalone/iguana/util.hpp b/include/ylt/standalone/iguana/util.hpp index f910db795..47959618a 100644 --- a/include/ylt/standalone/iguana/util.hpp +++ b/include/ylt/standalone/iguana/util.hpp @@ -139,6 +139,10 @@ struct is_variant> : std::true_type {}; template constexpr inline bool variant_v = is_variant>::value; +template +using variant_element_t = std::remove_reference_t( + std::declval>()))>; + template constexpr inline bool refletable_v = is_reflection_v>; @@ -201,10 +205,59 @@ inline constexpr auto has_qoute = [](uint64_t chunk) IGUANA__INLINE_LAMBDA { 0b0010001000100010001000100010001000100010001000100010001000100010); }; +template +IGUANA_INLINE void write_unicode_to_string(Ch& it, Stream& ss) { + static const char hexDigits[16] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + unsigned codepoint = 0; + if (!decode_utf8(it, codepoint)) + IGUANA_UNLIKELY { throw std::runtime_error("illegal unicode character"); } + if constexpr (is_xml_serialization) { + ss.append("&#x"); + } + else { + ss.push_back('\\'); + ss.push_back('u'); + } + + if (codepoint <= 0xD7FF || (codepoint >= 0xE000 && codepoint <= 0xFFFF)) { + ss.push_back(hexDigits[(codepoint >> 12) & 15]); + ss.push_back(hexDigits[(codepoint >> 8) & 15]); + ss.push_back(hexDigits[(codepoint >> 4) & 15]); + ss.push_back(hexDigits[(codepoint)&15]); + } + else { + if (codepoint < 0x010000 || codepoint > 0x10FFFF) + IGUANA_UNLIKELY { throw std::runtime_error("illegal codepoint"); } + // Surrogate pair + unsigned s = codepoint - 0x010000; + unsigned lead = (s >> 10) + 0xD800; + unsigned trail = (s & 0x3FF) + 0xDC00; + ss.push_back(hexDigits[(lead >> 12) & 15]); + ss.push_back(hexDigits[(lead >> 8) & 15]); + ss.push_back(hexDigits[(lead >> 4) & 15]); + ss.push_back(hexDigits[(lead)&15]); + if constexpr (is_xml_serialization) { + ss.append(";&#x"); + } + else { + ss.push_back('\\'); + ss.push_back('u'); + } + ss.push_back(hexDigits[(trail >> 12) & 15]); + ss.push_back(hexDigits[(trail >> 8) & 15]); + ss.push_back(hexDigits[(trail >> 4) & 15]); + ss.push_back(hexDigits[(trail)&15]); + } + if constexpr (is_xml_serialization) { + ss.push_back(';'); + } +} + // https://github.com/Tencent/rapidjson/blob/master/include/rapidjson/writer.h template -inline void write_string_with_escape(const Ch* it, SizeType length, - Stream& ss) { +IGUANA_INLINE void write_string_with_escape(const Ch* it, SizeType length, + Stream& ss) { static const char hexDigits[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; static const char escape[256] = { @@ -227,40 +280,7 @@ inline void write_string_with_escape(const Ch* it, SizeType length, std::advance(end, length); while (it < end) { if (static_cast(*it) >= 0x80) - IGUANA_UNLIKELY { - unsigned codepoint = 0; - if (!decode_utf8(it, codepoint)) - IGUANA_UNLIKELY { - throw std::runtime_error("illegal unicode character"); - } - ss.push_back('\\'); - ss.push_back('u'); - if (codepoint <= 0xD7FF || - (codepoint >= 0xE000 && codepoint <= 0xFFFF)) { - ss.push_back(hexDigits[(codepoint >> 12) & 15]); - ss.push_back(hexDigits[(codepoint >> 8) & 15]); - ss.push_back(hexDigits[(codepoint >> 4) & 15]); - ss.push_back(hexDigits[(codepoint)&15]); - } - else { - if (codepoint < 0x010000 || codepoint > 0x10FFFF) - IGUANA_UNLIKELY { throw std::runtime_error("illegal codepoint"); } - // Surrogate pair - unsigned s = codepoint - 0x010000; - unsigned lead = (s >> 10) + 0xD800; - unsigned trail = (s & 0x3FF) + 0xDC00; - ss.push_back(hexDigits[(lead >> 12) & 15]); - ss.push_back(hexDigits[(lead >> 8) & 15]); - ss.push_back(hexDigits[(lead >> 4) & 15]); - ss.push_back(hexDigits[(lead)&15]); - ss.push_back('\\'); - ss.push_back('u'); - ss.push_back(hexDigits[(trail >> 12) & 15]); - ss.push_back(hexDigits[(trail >> 8) & 15]); - ss.push_back(hexDigits[(trail >> 4) & 15]); - ss.push_back(hexDigits[(trail)&15]); - } - } + IGUANA_UNLIKELY { write_unicode_to_string(it, ss); } else if (escape[static_cast(*it)]) IGUANA_UNLIKELY { ss.push_back('\\'); @@ -281,4 +301,41 @@ inline void write_string_with_escape(const Ch* it, SizeType length, } } +template +IGUANA_INLINE constexpr bool has_duplicate(const std::array& arr) { + for (int i = 0; i < arr.size(); i++) { + for (int j = i + 1; j < arr.size(); j++) { + if (arr[i] == arr[j]) { + return true; + } + } + } + return false; +} + +#if defined(__clang__) || defined(_MSC_VER) || \ + (defined(__GNUC__) && __GNUC__ > 8) +template +IGUANA_INLINE constexpr bool has_duplicate_type() { + std::array arr{ + iguana::type_string()...}; + return has_duplicate(arr); +} + +template +struct has_duplicate_type_in_variant : std::false_type {}; + +template +struct has_duplicate_type_in_variant> { + inline constexpr static bool value = has_duplicate_type(); +}; + +template +constexpr inline bool has_duplicate_type_v = + has_duplicate_type_in_variant::value; +#else +template +constexpr inline bool has_duplicate_type_v = false; +#endif + } // namespace iguana diff --git a/include/ylt/standalone/iguana/version.hpp b/include/ylt/standalone/iguana/version.hpp new file mode 100644 index 000000000..202ea4a3a --- /dev/null +++ b/include/ylt/standalone/iguana/version.hpp @@ -0,0 +1,8 @@ +#pragma once + +// Note: Update the version when release a new version. + +// IGUANA_VERSION % 100 is the sub-minor version +// IGUANA_VERSION / 100 % 1000 is the minor version +// IGUANA_VERSION / 100000 is the major version +#define IGUANA_VERSION 100004 // 1.0.4 \ No newline at end of file diff --git a/include/ylt/standalone/iguana/xml_reader.hpp b/include/ylt/standalone/iguana/xml_reader.hpp index 63bfdb848..209b4f894 100644 --- a/include/ylt/standalone/iguana/xml_reader.hpp +++ b/include/ylt/standalone/iguana/xml_reader.hpp @@ -28,15 +28,26 @@ template , int> = 0> IGUANA_INLINE void parse_value(U &&value, It &&begin, It &&end) { using T = std::decay_t; if constexpr (string_container_v) { - value = T(&*begin, static_cast(std::distance(begin, end))); + if constexpr (string_view_v) { + value = T(&*begin, static_cast(std::distance(begin, end))); + } + else { + // TODO: When not parsing the value in the attribute, it is not necessary + // to unescape'and " + value.clear(); + auto pre = begin; + while (advance_until_character<'&'>(begin, end)) { + value.append(T(&*pre, static_cast(std::distance(pre, begin)))); + parse_escape_xml(value, begin, end); + pre = begin; + } + value.append(T(&*pre, static_cast(std::distance(pre, begin)))); + } } else if constexpr (num_v) { auto size = std::distance(begin, end); const auto start = &*begin; - auto [p, ec] = detail::from_chars(start, start + size, value); - if (ec != std::errc{}) - IGUANA_UNLIKELY - throw std::runtime_error("Failed to parse number"); + detail::from_chars(start, start + size, value); } else if constexpr (char_v) { if (static_cast(std::distance(begin, end)) != 1) @@ -90,9 +101,19 @@ IGUANA_INLINE void parse_attr(U &&value, It &&it, It &&end) { parse_value(key, key_begin, key_end); skip_sapces_and_newline(it, end); - match<'"'>(it, end); - auto value_begin = it; - auto value_end = skip_pass<'"'>(it, end); + auto value_begin = it + 1; + auto value_end = value_begin; + if (*it == '"') + IGUANA_LIKELY { + ++it; + value_end = skip_pass<'"'>(it, end); + } + else if (*it == '\'') { + ++it; + value_end = skip_pass<'\''>(it, end); + } + else + IGUANA_UNLIKELY { throw std::runtime_error("expected quote or apos"); } value_type v; parse_value(v, value_begin, value_end); value.emplace(std::move(key), std::move(v)); @@ -121,17 +142,8 @@ IGUANA_INLINE void parse_item(U &value, It &&it, It &&end, match<'<'>(it, end); if (*it == '?' || *it == '!') IGUANA_UNLIKELY { - // skip '>(it, end); - ++it; - skip_sapces_and_newline(it, end); - continue; - } + --it; + return; } auto start = it; skip_till_greater_or_space(it, end); @@ -223,24 +235,48 @@ IGUANA_INLINE void skip_object_value(It &&it, It &&end, std::string_view name) { throw std::runtime_error("unclosed tag: " + std::string(name)); } +// skip +template +IGUANA_INLINE void skip_instructions(It &&it, It &&end) { + while (*(it - 1) != '?') { + ++it; + skip_till<'>'>(it, end); + } + ++it; +} + +template +IGUANA_INLINE void skip_cdata(It &&it, It &&end) { + ++it; + skip_till<']'>(it, end); + ++it; + match<']', '>'>(it, end); +} + +template +IGUANA_INLINE void skip_comment(It &&it, It &&end) { + while (*(it - 1) != '-' || *(it - 2) != '-') { + ++it; + skip_till<'>'>(it, end); + } + ++it; +} + // return true means reach the close tag template , int> = 0> -IGUANA_INLINE auto skip_till_key(T &value, It &&it, It &&end) { - skip_sapces_and_newline(it, end); +IGUANA_INLINE auto skip_till_close_tag(T &value, It &&it, It &&end) { while (true) { + skip_sapces_and_newline(it, end); match<'<'>(it, end); if (*it == '/') IGUANA_UNLIKELY { - // - return true; // reach the close tag + // reach the close tag + return true; } else if (*it == '?') IGUANA_UNLIKELY { - // - skip_till<'>'>(it, end); - ++it; - skip_sapces_and_newline(it, end); + skip_instructions(it, end); continue; } else if (*it == '!') @@ -249,12 +285,7 @@ IGUANA_INLINE auto skip_till_key(T &value, It &&it, It &&end) { if (*it == '[') { // >()) { - ++it; - skip_till<']'>(it, end); - ++it; - match<']', '>'>(it, end); - skip_sapces_and_newline(it, end); - continue; + skip_cdata(it, end); } else { // if parse cdata @@ -274,23 +305,53 @@ IGUANA_INLINE auto skip_till_key(T &value, It &&it, It &&end) { &*vb, static_cast(std::distance(vb, ve))); } match<']', '>'>(it, end); - skip_sapces_and_newline(it, end); - continue; } } - else { + else if (*it == '-') { // - // skip_till<'>'>(it, end); ++it; - skip_sapces_and_newline(it, end); - continue; } + continue; } return false; } } +template +IGUANA_INLINE void skip_till_first_key(It &&it, It &&end) { + while (it != end) { + skip_sapces_and_newline(it, end); + match<'<'>(it, end); + if (*it == '?') + IGUANA_UNLIKELY { + skip_instructions(it, end); + continue; + } + else if (*it == '!') + IGUANA_UNLIKELY { + ++it; + if (*it == '-') { + // + skip_comment(it, end); + } + else { + // + skip_till<'>'>(it, end); + ++it; + } + continue; + } + else { + break; + } + } +} + template IGUANA_INLINE void check_required(std::string_view key_set) { if constexpr (iguana::has_iguana_required_arr_v) { @@ -313,7 +374,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end, constexpr auto cdata_idx = get_type_index(); skip_till<'>'>(it, end); ++it; - if (skip_till_key(value, it, end)) { + if (skip_till_close_tag(value, it, end)) { match_close_tag(it, end, name); return; } @@ -344,7 +405,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end, key_set.append(key).append(", "); } } - if (skip_till_key(value, it, end)) + if (skip_till_close_tag(value, it, end)) IGUANA_UNLIKELY { match_close_tag(it, end, name); parse_done = true; @@ -389,7 +450,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end, skip_object_value(it, end, key); #endif } - if (skip_till_key(value, it, end)) { + if (skip_till_close_tag(value, it, end)) { match_close_tag(it, end, name); check_required(key_set); return; @@ -405,17 +466,7 @@ IGUANA_INLINE void parse_item(T &value, It &&it, It &&end, template , int> = 0> IGUANA_INLINE void from_xml(U &value, It &&it, It &&end) { - while (it != end) { - skip_sapces_and_newline(it, end); - match<'<'>(it, end); - if (*it == '?') { - skip_till<'>'>(it, end); - ++it; - } - else { - break; - } - } + detail::skip_till_first_key(it, end); auto start = it; skip_till_greater_or_space(it, end); std::string_view key = @@ -426,17 +477,7 @@ IGUANA_INLINE void from_xml(U &value, It &&it, It &&end) { template , int> = 0> IGUANA_INLINE void from_xml(U &value, It &&it, It &&end) { - while (it != end) { - skip_sapces_and_newline(it, end); - match<'<'>(it, end); - if (*it == '?') { - skip_till<'>'>(it, end); - ++it; // skip > - } - else { - break; - } - } + detail::skip_till_first_key(it, end); auto start = it; skip_till_greater_or_space(it, end); std::string_view key = diff --git a/include/ylt/standalone/iguana/xml_util.hpp b/include/ylt/standalone/iguana/xml_util.hpp index cd2df709b..dabd47a38 100644 --- a/include/ylt/standalone/iguana/xml_util.hpp +++ b/include/ylt/standalone/iguana/xml_util.hpp @@ -2,8 +2,8 @@ #include "util.hpp" namespace iguana { -template > +template > class xml_attr_t { public: T &value() { return val_; } @@ -17,6 +17,10 @@ class xml_attr_t { map_type attr_; }; +template +using xml_attr_view_t = + xml_attr_t>; + template , int> = 0> class xml_cdata_t { @@ -72,12 +76,24 @@ inline constexpr auto has_square_bracket = 0b0101110101011101010111010101110101011101010111010101110101011101); }; +inline constexpr auto has_and = [](uint64_t chunk) IGUANA__INLINE_LAMBDA { + return has_zero( + chunk ^ + 0b0010011000100110001001100010011000100110001001100010011000100110); +}; + inline constexpr auto has_equal = [](uint64_t chunk) IGUANA__INLINE_LAMBDA { return has_zero( chunk ^ 0b0011110100111101001111010011110100111101001111010011110100111101); }; +inline constexpr auto has_apos = [](uint64_t chunk) IGUANA__INLINE_LAMBDA { + return has_zero( + chunk ^ + 0b0010011100100111001001110010011100100111001001110010011100100111); +}; + template IGUANA_INLINE void skip_sapces_and_newline(It &&it, It &&end) { while (it != end && (static_cast(*it) < 33)) { @@ -104,6 +120,35 @@ IGUANA_INLINE void match_close_tag(It &&it, It &&end, std::string_view key) { // ++it; } +// returns true if the specified character 'c' is found, false otherwise. +template +IGUANA_INLINE bool advance_until_character(It &&it, It &&end) { + static_assert(contiguous_iterator>); + if (std::distance(it, end) >= 7) + IGUANA_LIKELY { + const auto end_m7 = end - 7; + for (; it < end_m7; it += 8) { + const auto chunk = *reinterpret_cast(&*it); + uint64_t test; + if constexpr (c == '&') + test = has_and(chunk); + else + static_assert(!c, "not support this character"); + if (test != 0) { + it += (countr_zero(test) >> 3); + return true; + } + } + } + // Tail end of buffer. Should be rare we even get here + while (it < end) { + if (*it == c) + return true; + ++it; + } + return false; +} + template IGUANA_INLINE void skip_till(It &&it, It &&end) { static_assert(contiguous_iterator>); @@ -126,6 +171,8 @@ IGUANA_INLINE void skip_till(It &&it, It &&end) { test = has_square_bracket(chunk); else if constexpr (c == '=') test = has_equal(chunk); + else if constexpr (c == '\'') + test = has_apos(chunk); else static_assert(!c, "not support this character"); if (test != 0) { @@ -186,4 +233,104 @@ IGUANA_INLINE auto skip_pass(It &&it, It &&end) { return res + 1; } +template +IGUANA_INLINE bool is_match(It &&it, const It &end) { + const auto n = static_cast(std::distance(it, end)); + if ((n < sizeof...(C)) || (... || (*it++ != C))) { + return false; + } + return true; +} + +template , int> = 0> +IGUANA_INLINE void parse_escape_xml(U &value, It &&it, It &&end) { + static const unsigned char lookup_digits[256] = { + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, + 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 10, 11, 12, 13, 14, 15, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255}; + switch (*(it + 1)) { + // & ' + case 'a': + if (is_match<'m', 'p', ';'>(it + 2, end)) { + value.push_back('&'); + it += 5; + return; + } + if (is_match<'p', 'o', 's', ';'>(it + 2, end)) { + value.push_back('\''); + it += 6; + } + break; + // " + case 'q': + if (is_match<'u', 'o', 't', ';'>(it + 2, end)) { + value.push_back('\"'); + it += 6; + } + break; + // > + case 'g': + if (is_match<'t', ';'>(it + 2, end)) { + value.push_back('>'); + it += 4; + } + break; + // < + case 'l': + if (is_match<'t', ';'>(it + 2, end)) { + value.push_back('<'); + it += 4; + } + break; + case '#': + if (*(it + 2) == 'x') { + // &#x + unsigned long codepoint = 0; + it += 3; + while (true) { + auto digit = lookup_digits[static_cast(*it)]; + if (digit == 0xFF) + break; + codepoint = codepoint * 16 + digit; + ++it; + } + encode_utf8(value, codepoint); + } + else { + unsigned long codepoint = 0; + it += 2; + while (true) { + auto digit = lookup_digits[static_cast(*it)]; + if (digit == 0xFF) + break; + codepoint = codepoint * 10 + digit; + ++it; + } + encode_utf8(value, codepoint); + } + match<';'>(it, end); + break; + default: + // skip '&' + // loose policy: allow '&' + value.push_back(*(it++)); + break; + } +} + } // namespace iguana diff --git a/include/ylt/standalone/iguana/xml_writer.hpp b/include/ylt/standalone/iguana/xml_writer.hpp index b83a2c8bb..94e35f37b 100644 --- a/include/ylt/standalone/iguana/xml_writer.hpp +++ b/include/ylt/standalone/iguana/xml_writer.hpp @@ -6,6 +6,58 @@ namespace iguana { +#ifdef XML_ATTR_USE_APOS +#define XML_ATTR_DELIMITER '\'' +#else +#define XML_ATTR_DELIMITER '\"' +#endif + +// TODO: improve by precaculate size +template +IGUANA_INLINE void render_string_with_escape_xml(const Ch *it, SizeType length, + Stream &ss) { + auto end = it; + std::advance(end, length); + while (it < end) { +#ifdef XML_ESCAPE_UNICODE + if (static_cast(*it) >= 0x80) + IGUANA_UNLIKELY { + write_unicode_to_string(it, ss); + continue; + } +#endif + if constexpr (escape_quote_apos) { + if constexpr (XML_ATTR_DELIMITER == '\"') { + if (*it == '"') + IGUANA_UNLIKELY { + ss.append("""); + ++it; + continue; + } + } + else { + if (*it == '\'') + IGUANA_UNLIKELY { + ss.append("'"); + ++it; + continue; + } + } + } + if (*it == '&') + IGUANA_UNLIKELY { ss.append("&"); } + else if (*it == '>') + IGUANA_UNLIKELY { ss.append(">"); } + else if (*it == '<') + IGUANA_UNLIKELY { ss.append("<"); } + else { + ss.push_back(*it); + } + ++it; + } +} + template , int> = 0> IGUANA_INLINE void render_xml_value(Stream &ss, const T &value, @@ -39,10 +91,12 @@ IGUANA_INLINE void render_head(Stream &ss, std::string_view str) { ss.push_back('>'); } -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_value(Stream &ss, const T &value) { if constexpr (string_container_v) { - ss.append(value.data(), value.size()); + render_string_with_escape_xml(value.data(), value.size(), + ss); } else if constexpr (num_v) { char temp[65]; @@ -91,9 +145,9 @@ inline void render_xml_attr(Stream &ss, const T &value, std::string_view name) { ss.push_back(' '); render_value(ss, k); ss.push_back('='); - ss.push_back('"'); - render_value(ss, v); - ss.push_back('"'); + ss.push_back(XML_ATTR_DELIMITER); + render_value(ss, v); + ss.push_back(XML_ATTR_DELIMITER); } ss.push_back('>'); } diff --git a/include/ylt/standalone/iguana/yaml_reader.hpp b/include/ylt/standalone/iguana/yaml_reader.hpp index 9ad04e30a..0a0d47fa7 100644 --- a/include/ylt/standalone/iguana/yaml_reader.hpp +++ b/include/ylt/standalone/iguana/yaml_reader.hpp @@ -113,10 +113,7 @@ IGUANA_INLINE void parse_value(U &value, It &&value_begin, It &&value_end) { IGUANA_UNLIKELY { return; } auto size = std::distance(value_begin, value_end); const auto start = &*value_begin; - auto [p, ec] = detail::from_chars(start, start + size, value); - if (ec != std::errc{}) - IGUANA_UNLIKELY - throw std::runtime_error("Failed to parse number"); + detail::from_chars(start, start + size, value); } // string_view should be used for string with ' " ?