From b9bc680c6b8909914b6b2161e5eaa146ba6c0c2f Mon Sep 17 00:00:00 2001 From: bbgan <2893129936@qq.com> Date: Sun, 28 Jan 2024 15:12:41 +0800 Subject: [PATCH] [struct_json, struct_yaml][feat]support escape during serialization --- include/ylt/thirdparty/iguana/detail/utf.hpp | 99 ++++++++++++++++ include/ylt/thirdparty/iguana/json_writer.hpp | 106 ++++++++++-------- include/ylt/thirdparty/iguana/util.hpp | 85 +++++++++++++- include/ylt/thirdparty/iguana/yaml_writer.hpp | 81 +++++++------ src/struct_json/examples/main.cpp | 11 ++ 5 files changed, 300 insertions(+), 82 deletions(-) diff --git a/include/ylt/thirdparty/iguana/detail/utf.hpp b/include/ylt/thirdparty/iguana/detail/utf.hpp index a6fa7012a..e0dae985f 100644 --- a/include/ylt/thirdparty/iguana/detail/utf.hpp +++ b/include/ylt/thirdparty/iguana/detail/utf.hpp @@ -3,6 +3,8 @@ #include #include +#include "iguana/define.h" + namespace iguana { // https://github.com/Tencent/rapidjson/blob/master/include/rapidjson/reader.h template @@ -48,4 +50,101 @@ inline void encode_utf8(OutputStream &os, unsigned codepoint) { os.push_back(static_cast(0x80 | (codepoint & 0x3F))); } } + +// https://github.com/Tencent/rapidjson/blob/master/include/rapidjson/encodings.h +static inline unsigned char GetRange(unsigned char c) { + static const unsigned char type[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0x10, 0x10, 0x10, 0x10, + 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, + 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, + 0x40, 0x40, 0x40, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, + 8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 10, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, + 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, + }; + return type[c]; +} + +// https://github.com/Tencent/rapidjson/blob/master/include/rapidjson/encodings.h +template +inline bool decode_utf8(It &&it, unsigned &codepoint) { + auto c = *(it++); + bool result = true; + auto copy = [&]() IGUANA__INLINE_LAMBDA { + c = *(it++); + codepoint = (codepoint << 6) | (static_cast(c) & 0x3Fu); + }; + auto trans = [&](unsigned mask) IGUANA__INLINE_LAMBDA { + result &= ((GetRange(static_cast(c)) & mask) != 0); + }; + auto tail = [&]() IGUANA__INLINE_LAMBDA { + copy(); + trans(0x70); + }; + if (!(c & 0x80)) { + codepoint = static_cast(c); + return true; + } + unsigned char type = GetRange(static_cast(c)); + if (type >= 32) { + codepoint = 0; + } + else { + codepoint = (0xFFu >> type) & static_cast(c); + } + switch (type) { + case 2: + tail(); + return result; + case 3: + tail(); + tail(); + return result; + case 4: + copy(); + trans(0x50); + tail(); + return result; + case 5: + copy(); + trans(0x10); + tail(); + tail(); + return result; + case 6: + tail(); + tail(); + tail(); + return result; + case 10: + copy(); + trans(0x20); + tail(); + return result; + case 11: + copy(); + trans(0x60); + tail(); + tail(); + return result; + default: + return false; + } +} + } // namespace iguana diff --git a/include/ylt/thirdparty/iguana/json_writer.hpp b/include/ylt/thirdparty/iguana/json_writer.hpp index 3d77ff638..2df2bfe8d 100644 --- a/include/ylt/thirdparty/iguana/json_writer.hpp +++ b/include/ylt/thirdparty/iguana/json_writer.hpp @@ -8,33 +8,35 @@ namespace iguana { -template , int> = 0> IGUANA_INLINE void to_json(T &&t, Stream &s); -template +template IGUANA_INLINE void render_json_value(Stream &ss, std::optional &val); -template , int> = 0> IGUANA_INLINE void render_json_value(Stream &ss, const T &t); -template , int> = 0> IGUANA_INLINE void render_json_value(Stream &ss, const T &v); -template , int> = 0> IGUANA_INLINE void render_json_value(Stream &ss, const T &v); -template , int> = 0> IGUANA_INLINE void render_json_value(Stream &ss, const T &o); -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_json_value(Stream &s, T &&t); -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_json_value(Stream &s, T &&t); template @@ -42,7 +44,6 @@ IGUANA_INLINE void join(Stream &ss, InputIt first, InputIt last, const T &delim, const F &f) { if (first == last) return; - f(*first++); while (first != last) { ss.push_back(delim); @@ -50,75 +51,85 @@ IGUANA_INLINE void join(Stream &ss, InputIt first, InputIt last, const T &delim, } } -template +template IGUANA_INLINE void render_json_value(Stream &ss, std::nullptr_t) { ss.append("null"); } -template +template IGUANA_INLINE void render_json_value(Stream &ss, bool b) { ss.append(b ? "true" : "false"); }; -template +template IGUANA_INLINE void render_json_value(Stream &ss, char value) { ss.append("\""); ss.push_back(value); ss.append("\""); } -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_json_value(Stream &ss, T value) { char temp[65]; auto p = detail::to_chars(temp, value); ss.append(temp, p - temp); } -template , int> = 0> IGUANA_INLINE void render_json_value(Stream &ss, T v) { ss.append(v.value().data(), v.value().size()); } -template , int> = 0> IGUANA_INLINE void render_json_value(Stream &ss, T &&t) { ss.push_back('"'); - ss.append(t.data(), t.size()); + if constexpr (Is_writing_escape) { + write_string_with_escape(t.data(), t.size(), ss); + } + else { + ss.append(t.data(), t.size()); + } ss.push_back('"'); } -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_key(Stream &ss, T &t) { ss.push_back('"'); - render_json_value(ss, t); + render_json_value(ss, t); ss.push_back('"'); } -template , int> = 0> IGUANA_INLINE void render_key(Stream &ss, T &&t) { - render_json_value(ss, std::forward(t)); + render_json_value(ss, std::forward(t)); } -template , int> = 0> IGUANA_INLINE void render_json_value(Stream &ss, T &&t) { to_json(std::forward(t), ss); } -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_json_value(Stream &ss, T val) { static constexpr auto enum_to_str = get_enum_map>(); if constexpr (bool_v) { - render_json_value(ss, static_cast>(val)); + render_json_value( + ss, static_cast>(val)); } else { auto it = enum_to_str.find(val); if (it != enum_to_str.end()) IGUANA_LIKELY { auto str = it->second; - render_json_value(ss, std::string_view(str.data(), str.size())); + render_json_value( + ss, std::string_view(str.data(), str.size())); } else { throw std::runtime_error( @@ -128,27 +139,28 @@ IGUANA_INLINE void render_json_value(Stream &ss, T val) { } } -template +template IGUANA_INLINE void render_json_value(Stream &ss, std::optional &val) { if (!val) { ss.append("null"); } else { - render_json_value(ss, *val); + render_json_value(ss, *val); } } -template +template IGUANA_INLINE void render_array(Stream &ss, const T &v) { ss.push_back('['); join(ss, std::begin(v), std::end(v), ',', [&ss](const auto &jsv) IGUANA__INLINE_LAMBDA { - render_json_value(ss, jsv); + render_json_value(ss, jsv); }); ss.push_back(']'); } -template , int>> +template , int>> IGUANA_INLINE void render_json_value(Stream &ss, const T &t) { if constexpr (std::is_same_v()[0])>>) { @@ -166,30 +178,30 @@ IGUANA_INLINE void render_json_value(Stream &ss, const T &t) { ss.push_back('"'); } else { - render_array(ss, t); + render_array(ss, t); } } -template , int>> IGUANA_INLINE void render_json_value(Stream &ss, const T &o) { ss.push_back('{'); join(ss, o.cbegin(), o.cend(), ',', [&ss](const auto &jsv) IGUANA__INLINE_LAMBDA { - render_key(ss, jsv.first); + render_key(ss, jsv.first); ss.push_back(':'); - render_json_value(ss, jsv.second); + render_json_value(ss, jsv.second); }); ss.push_back('}'); } -template , int>> IGUANA_INLINE void render_json_value(Stream &ss, const T &v) { ss.push_back('['); join(ss, v.cbegin(), v.cend(), ',', [&ss](const auto &jsv) IGUANA__INLINE_LAMBDA { - render_json_value(ss, jsv); + render_json_value(ss, jsv); }); ss.push_back(']'); } @@ -203,24 +215,26 @@ constexpr auto write_json_key = [](auto &s, auto i, s.push_back('"'); }; -template , int>> +template , int>> IGUANA_INLINE void render_json_value(Stream &ss, const T &v) { if (v) { - render_json_value(ss, *v); + render_json_value(ss, *v); } else { ss.append("null"); } } -template , int>> +template , int>> IGUANA_INLINE void render_json_value(Stream &s, T &&t) { using U = typename std::decay_t; s.push_back('['); constexpr size_t size = std::tuple_size_v; for_each(std::forward(t), [&s, size](auto &v, auto i) IGUANA__INLINE_LAMBDA { - render_json_value(s, v); + render_json_value(s, v); if (i != size - 1) IGUANA_LIKELY { s.push_back(','); } @@ -228,16 +242,18 @@ IGUANA_INLINE void render_json_value(Stream &s, T &&t) { s.push_back(']'); } -template , int>> +template , int>> IGUANA_INLINE void render_json_value(Stream &s, T &&t) { std::visit( [&s](auto value) { - render_json_value(s, value); + render_json_value(s, value); }, t); } -template , int>> +template , int>> IGUANA_INLINE void to_json(T &&t, Stream &s) { s.push_back('{'); for_each(std::forward(t), @@ -249,17 +265,17 @@ IGUANA_INLINE void to_json(T &&t, Stream &s) { write_json_key(s, i, t); s.push_back(':'); - render_json_value(s, t.*v); + render_json_value(s, t.*v); if (Idx < Count - 1) IGUANA_LIKELY { s.push_back(','); } }); s.push_back('}'); } -template , int> = 0> IGUANA_INLINE void to_json(T &&t, Stream &s) { - render_json_value(s, t); + render_json_value(s, t); } } // namespace iguana diff --git a/include/ylt/thirdparty/iguana/util.hpp b/include/ylt/thirdparty/iguana/util.hpp index b5ec2c3e0..3d2605cd2 100644 --- a/include/ylt/thirdparty/iguana/util.hpp +++ b/include/ylt/thirdparty/iguana/util.hpp @@ -13,6 +13,7 @@ #include "define.h" #include "detail/charconv.h" +#include "detail/utf.hpp" #include "enum_reflection.hpp" #include "error_code.h" #include "reflection.hpp" @@ -71,7 +72,7 @@ constexpr inline bool map_container_v = is_map_container>::value; template -constexpr inline bool c_array_v = std::is_array_v> && +constexpr inline bool c_array_v = std::is_array_v>&& std::extent_v> > 0; template @@ -175,7 +176,7 @@ template using underline_type_t = typename underline_type>::type; template -IGUANA_INLINE void match(It &&it, It &&end) { +IGUANA_INLINE void match(It&& it, It&& end) { const auto n = static_cast(std::distance(it, end)); if (n < sizeof...(C)) IGUANA_UNLIKELY { @@ -203,4 +204,84 @@ inline constexpr auto has_qoute = [](uint64_t chunk) IGUANA__INLINE_LAMBDA { 0b0010001000100010001000100010001000100010001000100010001000100010); }; +// https://github.com/Tencent/rapidjson/blob/master/include/rapidjson/writer.h +template +inline void write_string_with_escape(const Ch* it, SizeType length, + Stream& ss) { + static const char hexDigits[16] = {'0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; + static const char escape[256] = { +#define Z16 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + // 0 1 2 3 4 5 6 7 8 9 A B C D E + // F + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'b', 't', + 'n', 'u', 'f', 'r', 'u', 'u', // 00 + 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', 'u', + 'u', 'u', 'u', 'u', 'u', 'u', // 10 + 0, 0, '"', 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, // 20 + Z16, Z16, // 30~4F + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, '\\', 0, 0, 0, // 50 + Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16 // 60~FF +#undef Z16 + }; + auto end = it; + std::advance(end, length); + while (it < end) { + if (static_cast(*it) >= 0x80) + IGUANA_UNLIKELY { + unsigned codepoint = 0; + if (!decode_utf8(it, codepoint)) + IGUANA_UNLIKELY { + throw std::runtime_error("illegal unicode character"); + } + ss.push_back('\\'); + ss.push_back('u'); + if (codepoint <= 0xD7FF || + (codepoint >= 0xE000 && codepoint <= 0xFFFF)) { + ss.push_back(hexDigits[(codepoint >> 12) & 15]); + ss.push_back(hexDigits[(codepoint >> 8) & 15]); + ss.push_back(hexDigits[(codepoint >> 4) & 15]); + ss.push_back(hexDigits[(codepoint)&15]); + } + else { + if (codepoint < 0x010000 || codepoint > 0x10FFFF) + IGUANA_UNLIKELY { throw std::runtime_error("illegal codepoint"); } + // Surrogate pair + unsigned s = codepoint - 0x010000; + unsigned lead = (s >> 10) + 0xD800; + unsigned trail = (s & 0x3FF) + 0xDC00; + ss.push_back(hexDigits[(lead >> 12) & 15]); + ss.push_back(hexDigits[(lead >> 8) & 15]); + ss.push_back(hexDigits[(lead >> 4) & 15]); + ss.push_back(hexDigits[(lead)&15]); + ss.push_back('\\'); + ss.push_back('u'); + ss.push_back(hexDigits[(trail >> 12) & 15]); + ss.push_back(hexDigits[(trail >> 8) & 15]); + ss.push_back(hexDigits[(trail >> 4) & 15]); + ss.push_back(hexDigits[(trail)&15]); + } + } + else if (escape[static_cast(*it)]) + IGUANA_UNLIKELY { + ss.push_back('\\'); + ss.push_back(escape[static_cast(*it)]); + + if (escape[static_cast(*it)] == 'u') { + // escape other control characters + ss.push_back('0'); + ss.push_back('0'); + ss.push_back(hexDigits[static_cast(*it) >> 4]); + ss.push_back(hexDigits[static_cast(*it) & 0xF]); + } + ++it; + } + else { + ss.push_back(*(it++)); + } + } +} + } // namespace iguana diff --git a/include/ylt/thirdparty/iguana/yaml_writer.hpp b/include/ylt/thirdparty/iguana/yaml_writer.hpp index b6228b99a..a0794f2c3 100644 --- a/include/ylt/thirdparty/iguana/yaml_writer.hpp +++ b/include/ylt/thirdparty/iguana/yaml_writer.hpp @@ -5,28 +5,34 @@ namespace iguana { -template , int> = 0> IGUANA_INLINE void to_yaml(T &&t, Stream &s, size_t min_spaces = 0); -template , int> = 0> IGUANA_INLINE void render_yaml_value(Stream &ss, T &&t, size_t min_spaces) { ss.push_back('\n'); - to_yaml(std::forward(t), ss, min_spaces); + to_yaml(std::forward(t), ss, min_spaces); } -// TODO: support more string style, support escape -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_yaml_value(Stream &ss, T &&t, size_t min_spaces) { - ss.append(t.data(), t.size()); + if constexpr (Is_writing_escape) { + ss.push_back('"'); + write_string_with_escape(t.data(), t.size(), ss); + ss.push_back('"'); + } + else { + ss.append(t.data(), t.size()); + } if constexpr (appendLf) ss.push_back('\n'); } -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_yaml_value(Stream &ss, T value, size_t min_spaces) { char temp[65]; auto p = detail::to_chars(temp, value); @@ -35,7 +41,7 @@ IGUANA_INLINE void render_yaml_value(Stream &ss, T value, size_t min_spaces) { ss.push_back('\n'); } -template +template IGUANA_INLINE void render_yaml_value(Stream &ss, char value, size_t min_spaces) { ss.push_back(value); @@ -43,7 +49,7 @@ IGUANA_INLINE void render_yaml_value(Stream &ss, char value, ss.push_back('\n'); } -template +template IGUANA_INLINE void render_yaml_value(Stream &ss, bool value, size_t min_spaces) { ss.append(value ? "true" : "false"); @@ -51,21 +57,21 @@ IGUANA_INLINE void render_yaml_value(Stream &ss, bool value, ss.push_back('\n'); } -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_yaml_value(Stream &ss, T value, size_t min_spaces) { static constexpr auto enum_to_str = get_enum_map>(); if constexpr (bool_v) { - render_yaml_value(ss, static_cast>(value), - min_spaces); + render_yaml_value( + ss, static_cast>(value), min_spaces); } else { auto it = enum_to_str.find(value); if (it != enum_to_str.end()) IGUANA_LIKELY { auto str = it->second; - render_yaml_value(ss, std::string_view(str.data(), str.size()), - min_spaces); + render_yaml_value( + ss, std::string_view(str.data(), str.size()), min_spaces); } else { throw std::runtime_error( @@ -75,16 +81,17 @@ IGUANA_INLINE void render_yaml_value(Stream &ss, T value, size_t min_spaces) { } } -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_yaml_value(Stream &ss, const T &val, size_t min_spaces); -template , int> = 0> IGUANA_INLINE void render_yaml_value(Stream &ss, const T &val, size_t min_spaces); -template , int> = 0> IGUANA_INLINE void render_yaml_value(Stream &ss, const T &t, size_t min_spaces) { @@ -92,35 +99,37 @@ IGUANA_INLINE void render_yaml_value(Stream &ss, const T &t, for (const auto &v : t) { ss.append(min_spaces, ' '); ss.append("- "); - render_yaml_value(ss, v, min_spaces + 1); + render_yaml_value(ss, v, min_spaces + 1); } } -template , int> = 0> +template , int> = 0> IGUANA_INLINE void render_yaml_value(Stream &ss, T &&t, size_t min_spaces) { ss.push_back('\n'); for_each(std::forward(t), [&ss, min_spaces](auto &v, auto i) IGUANA__INLINE_LAMBDA { ss.append(min_spaces, ' '); ss.append("- "); - render_yaml_value(ss, v, min_spaces + 1); + render_yaml_value(ss, v, min_spaces + 1); }); } -template , int> = 0> IGUANA_INLINE void render_yaml_value(Stream &ss, const T &t, size_t min_spaces) { ss.push_back('\n'); for (const auto &[k, v] : t) { ss.append(min_spaces, ' '); - render_yaml_value(ss, k, 0); // key must be plaint type + render_yaml_value(ss, k, 0); // key must be plaint type ss.append(": "); - render_yaml_value(ss, v, min_spaces + 1); + render_yaml_value(ss, v, min_spaces + 1); } } -template , int>> +template , int>> IGUANA_INLINE void render_yaml_value(Stream &ss, const T &val, size_t min_spaces) { if (!val) { @@ -128,18 +137,19 @@ IGUANA_INLINE void render_yaml_value(Stream &ss, const T &val, ss.push_back('\n'); } else { - render_yaml_value(ss, *val, min_spaces); + render_yaml_value(ss, *val, min_spaces); } } -template , int>> +template , int>> IGUANA_INLINE void render_yaml_value(Stream &ss, const T &val, size_t min_spaces) { if (!val) { ss.push_back('\n'); } else { - render_yaml_value(ss, *val, min_spaces); + render_yaml_value(ss, *val, min_spaces); } } @@ -149,7 +159,8 @@ constexpr auto write_yaml_key = [](auto &s, auto i, s.append(name.data(), name.size()); }; -template , int>> +template , int>> IGUANA_INLINE void to_yaml(T &&t, Stream &s, size_t min_spaces) { for_each(std::forward(t), [&t, &s, min_spaces](const auto &v, auto i) IGUANA__INLINE_LAMBDA { @@ -161,21 +172,21 @@ IGUANA_INLINE void to_yaml(T &&t, Stream &s, size_t min_spaces) { write_yaml_key(s, i, t); s.append(": "); if constexpr (!is_reflection>::value) { - render_yaml_value(s, t.*v, min_spaces + 1); + render_yaml_value(s, t.*v, min_spaces + 1); } else { s.push_back('\n'); - to_yaml(t.*v, s, min_spaces + 1); + to_yaml(t.*v, s, min_spaces + 1); } }); } -template , int> = 0> IGUANA_INLINE void to_yaml(T &&t, Stream &s) { if constexpr (tuple_v || map_container_v || sequence_container_v || optional_v || smart_ptr_v) - render_yaml_value(s, std::forward(t), 0); + render_yaml_value(s, std::forward(t), 0); else static_assert(!sizeof(T), "don't suppport this type"); } diff --git a/src/struct_json/examples/main.cpp b/src/struct_json/examples/main.cpp index e946da810..9b7c8bae3 100644 --- a/src/struct_json/examples/main.cpp +++ b/src/struct_json/examples/main.cpp @@ -58,6 +58,16 @@ void use_smart_pointer() { assert(*p1.age == 42); } +void test_escape_serialize() { + person p{"老\t人", 20}; + std::string ss; + struct_json::to_json(p, ss); + std::cout << ss << std::endl; + person p1; + struct_json::from_json(p1, ss); + assert(p1.name == p.name); +} + int main() { person p{"tom", 20}; std::string str; @@ -79,4 +89,5 @@ int main() { test_inner_object(); use_smart_pointer(); + test_escape_serialize(); } \ No newline at end of file