From 15b15183aac52833f91edff2e4b3162c9b6f5a2b Mon Sep 17 00:00:00 2001 From: shihengzhen Date: Mon, 30 Jun 2025 15:26:47 +0800 Subject: [PATCH 1/3] fix surrogate pair encoding for serialization --- ajson.hpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/ajson.hpp b/ajson.hpp index 036f6ba..dfe65d6 100644 --- a/ajson.hpp +++ b/ajson.hpp @@ -1037,23 +1037,23 @@ namespace ajson } else { - unsigned char c1 = (uint8_t)(codepoint >> 24); - unsigned char c2 = (uint8_t)(codepoint >> 16); - unsigned char c3 = (uint8_t)(codepoint >> 8); - unsigned char c4 = (uint8_t)codepoint; + // utf-16 surrogate pair encoding (\uXXXX\uYYYY) + uint32_t cp_prime = codepoint - 0x10000; + uint16_t high = 0xD800 + (cp_prime >> 10); + uint16_t low = 0xDC00 + (cp_prime & 0x3FF); put('\\'); put('u'); - put(hex_table[(c1) >> 4]); - put(hex_table[(c1)& 0xF]); - put(hex_table[(c2) >> 4]); - put(hex_table[(c2)& 0xF]); + put(hex_table[(high >> 12) & 0xF]); + put(hex_table[(high >> 8) & 0xF]); + put(hex_table[(high >> 4) & 0xF]); + put(hex_table[high & 0xF]); put('\\'); put('u'); - put(hex_table[(c3) >> 4]); - put(hex_table[(c3)& 0xF]); - put(hex_table[(c4) >> 4]); - put(hex_table[(c4)& 0xF]); + put(hex_table[(low >> 12) & 0xF]); + put(hex_table[(low >> 8) & 0xF]); + put(hex_table[(low >> 4) & 0xF]); + put(hex_table[low & 0xF]); } } else @@ -1555,7 +1555,7 @@ namespace ajson } while (len > 0); return true; } - + template struct json_impl < ty, typename std::enable_if ::value>::type > From 565343e798f09e57f9f11d52ce7d8e1bf74c836f Mon Sep 17 00:00:00 2001 From: shihengzhen Date: Mon, 30 Jun 2025 15:30:02 +0800 Subject: [PATCH 2/3] fix surrogate pair encoding for deserialization --- ajson.hpp | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ajson.hpp b/ajson.hpp index dfe65d6..60b4aa9 100644 --- a/ajson.hpp +++ b/ajson.hpp @@ -1475,6 +1475,7 @@ namespace ajson template bool escape_string(string_ty& str , const char * data , size_t len) { + uint64_t high = 0; str.clear(); str.reserve(len); if (len == 0) @@ -1539,6 +1540,23 @@ namespace ajson len -= 4; if (uft1 == 0) return false; + if (uft1 >= 0xD800 && uft1 <= 0xDBFF) // high surrogate + { + if (high) + return false; // already have a high surrogate, error + high = uft1; + continue; + } + else if (uft1 >= 0xDC00 && uft1 <= 0xDFFF) // low surrogate + { + if (!high) + return false; // no preceding high surrogate, error + else + { + uft1 = 0x10000 + ((high - 0xD800) << 10) + (uft1 - 0xDC00); // claculate code point + high = 0; + } + } if (!esacpe_utf8(str, uft1)) return false; continue; @@ -1553,6 +1571,8 @@ namespace ajson } str.append(1, c); } while (len > 0); + if (high) + return false; // high surrogate without valid low surrogate return true; } From c0050691186e4fb93cbc3b26480b3b9f2c4028db Mon Sep 17 00:00:00 2001 From: shihengzhen Date: Mon, 30 Jun 2025 15:32:44 +0800 Subject: [PATCH 3/3] fix escape utf8 error --- ajson.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ajson.hpp b/ajson.hpp index 60b4aa9..02bc55b 100644 --- a/ajson.hpp +++ b/ajson.hpp @@ -1452,7 +1452,7 @@ namespace ajson str.append(1, (char)(0xC0 | ((utf1 >> 6) & 0xFF))); str.append(1, (char)(0x80 | ((utf1 & 0x3F)))); } - else if (utf1 < 0x80000) + else if (utf1 < 0x10000) { str.append(1, (char)(0xE0 | ((utf1 >> 12) & 0xFF))); str.append(1, (char)(0x80 | ((utf1 >> 6) & 0x3F))); @@ -1460,7 +1460,7 @@ namespace ajson } else { - if (utf1 < 0x110000) + if (utf1 >= 0x110000) { return false; }