diff --git a/ajson.hpp b/ajson.hpp index 036f6ba..02bc55b 100644 --- a/ajson.hpp +++ b/ajson.hpp @@ -1037,23 +1037,23 @@ namespace ajson } else { - unsigned char c1 = (uint8_t)(codepoint >> 24); - unsigned char c2 = (uint8_t)(codepoint >> 16); - unsigned char c3 = (uint8_t)(codepoint >> 8); - unsigned char c4 = (uint8_t)codepoint; + // utf-16 surrogate pair encoding (\uXXXX\uYYYY) + uint32_t cp_prime = codepoint - 0x10000; + uint16_t high = 0xD800 + (cp_prime >> 10); + uint16_t low = 0xDC00 + (cp_prime & 0x3FF); put('\\'); put('u'); - put(hex_table[(c1) >> 4]); - put(hex_table[(c1)& 0xF]); - put(hex_table[(c2) >> 4]); - put(hex_table[(c2)& 0xF]); + put(hex_table[(high >> 12) & 0xF]); + put(hex_table[(high >> 8) & 0xF]); + put(hex_table[(high >> 4) & 0xF]); + put(hex_table[high & 0xF]); put('\\'); put('u'); - put(hex_table[(c3) >> 4]); - put(hex_table[(c3)& 0xF]); - put(hex_table[(c4) >> 4]); - put(hex_table[(c4)& 0xF]); + put(hex_table[(low >> 12) & 0xF]); + put(hex_table[(low >> 8) & 0xF]); + put(hex_table[(low >> 4) & 0xF]); + put(hex_table[low & 0xF]); } } else @@ -1452,7 +1452,7 @@ namespace ajson str.append(1, (char)(0xC0 | ((utf1 >> 6) & 0xFF))); str.append(1, (char)(0x80 | ((utf1 & 0x3F)))); } - else if (utf1 < 0x80000) + else if (utf1 < 0x10000) { str.append(1, (char)(0xE0 | ((utf1 >> 12) & 0xFF))); str.append(1, (char)(0x80 | ((utf1 >> 6) & 0x3F))); @@ -1460,7 +1460,7 @@ namespace ajson } else { - if (utf1 < 0x110000) + if (utf1 >= 0x110000) { return false; } @@ -1475,6 +1475,7 @@ namespace ajson template bool escape_string(string_ty& str , const char * data , size_t len) { + uint64_t high = 0; str.clear(); str.reserve(len); if (len == 0) @@ -1539,6 +1540,23 @@ namespace ajson len -= 4; if (uft1 == 0) return false; + if (uft1 >= 0xD800 && uft1 <= 0xDBFF) // high surrogate + { + if (high) + return false; // already have a high surrogate, error + high = uft1; + continue; + } + else if (uft1 >= 0xDC00 && uft1 <= 0xDFFF) // low surrogate + { + if (!high) + return false; // no preceding high surrogate, error + else + { + uft1 = 0x10000 + ((high - 0xD800) << 10) + (uft1 - 0xDC00); // claculate code point + high = 0; + } + } if (!esacpe_utf8(str, uft1)) return false; continue; @@ -1553,9 +1571,11 @@ namespace ajson } str.append(1, c); } while (len > 0); + if (high) + return false; // high surrogate without valid low surrogate return true; } - + template struct json_impl < ty, typename std::enable_if ::value>::type >