diff --git a/luasystem-scm-0.rockspec b/luasystem-scm-0.rockspec index 00a442c..ab83080 100644 --- a/luasystem-scm-0.rockspec +++ b/luasystem-scm-0.rockspec @@ -61,6 +61,7 @@ local function make_platform(plat) 'src/term.c', 'src/bitflags.c', 'src/wcwidth.c', + 'src/wcwidtha.c', }, defines = defines[plat], libraries = libraries[plat], diff --git a/spec/04-term_spec.lua b/spec/04-term_spec.lua index 813947a..711059b 100644 --- a/spec/04-term_spec.lua +++ b/spec/04-term_spec.lua @@ -539,6 +539,12 @@ describe("Terminal:", function() assert.same({2}, {system.utf8cwidth(ch2 .. ch3 .. ch4)}) end) + it("returns 2nd ambigious boolean value only if requested", function() + assert.same({1}, {system.utf8cwidth("¡", false)}) + assert.same({1, true}, {system.utf8cwidth("¡", true)}) + assert.same({1, false}, {system.utf8cwidth("a", true)}) + end) + end) diff --git a/src/term.c b/src/term.c index 4deb5f1..650b838 100644 --- a/src/term.c +++ b/src/term.c @@ -36,6 +36,7 @@ // Windows does not have a wcwidth function, so we use compatibilty code from // http://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c by Markus Kuhn #include "wcwidth.h" +#include "wcwidtha.h" // ambiguous width checks for East Asian characters #ifdef _WIN32 @@ -950,14 +951,16 @@ int utf8_to_wchar(const char *utf8, size_t len, mk_wchar_t *codepoint) { Get the width of a utf8 character for terminal display. @function utf8cwidth @tparam string utf8_char the utf8 character to check, only the width of the first character will be returned +@tparam bool ambiguous if `true` a second return value will be returned; boolean indicating if the character is ambiguous @treturn[1] int the display width in columns of the first character in the string (0 for an empty string) -@treturn[2] nil +@treturn[2] nil|bool if `ambiguous` is `true`, a boolean indicating if the character is ambiguous @treturn[2] string error message */ int lst_utf8cwidth(lua_State *L) { const char *utf8_char; size_t utf8_len; utf8_char = luaL_checklstring(L, 1, &utf8_len); + int ambiguous = lua_toboolean(L, 2); int width = 0; mk_wchar_t wc; @@ -984,6 +987,12 @@ int lst_utf8cwidth(lua_State *L) { } lua_pushinteger(L, width); + + if (ambiguous) { + // also check if the width is ambiguous + lua_pushboolean(L, mk_wcwidth_a(wc)); + return 2; + } return 1; } diff --git a/src/wcwidtha.c b/src/wcwidtha.c new file mode 100644 index 0000000..2936ee0 --- /dev/null +++ b/src/wcwidtha.c @@ -0,0 +1,259 @@ +// To update this file to the lastest version of the Unicode standard +// save the Lua script below to a file named 'getranges.lua' +// execute as: +// curl -s https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt | lua getranges.lua + +/* the script: +local function singleline(line) + if line:find("; A", 1, true) then -- handle ambiguous characters only + local s,e = line:match("^([0-9a-fA-F]+)%.?%.?([0-9a-fA-F]*)") + e = (e == "" and s) or e -- single char, so end-range == start-range + local cmmnt = "// "..line:match("(; A.*)$") + local range = " {0x"..s..", 0x"..e.."}," + print(range..(" "):rep(30-#range)..cmmnt) -- print formatted output line + end +end + +-- read all lines from stdin and iterate over them +local t = {} +for line in io.lines() do + line = line:match("^%s*(.-)%s*$") -- strip whitespace + if line ~= "" and line:sub(1,1) ~= "#" then -- skip comments and empty lines + singleline(line) + end +end +*/ + + +#include "wcwidtha.h" + +struct interval { + mk_wchar_t start; + mk_wchar_t end; +}; + + +// Takes a unicode character, and return whether the character is in the list of +// ambiguous width characters. +int mk_wcwidth_a(mk_wchar_t ucs) +{ + /* sorted list of ambiguous width characters in East Asian displays */ + /* generated by script in the comments above */ + static const struct interval ranges[] = { + {0x00A1, 0x00A1}, // ; A # Po INVERTED EXCLAMATION MARK + {0x00A4, 0x00A4}, // ; A # Sc CURRENCY SIGN + {0x00A7, 0x00A7}, // ; A # Po SECTION SIGN + {0x00A8, 0x00A8}, // ; A # Sk DIAERESIS + {0x00AA, 0x00AA}, // ; A # Lo FEMININE ORDINAL INDICATOR + {0x00AD, 0x00AD}, // ; A # Cf SOFT HYPHEN + {0x00AE, 0x00AE}, // ; A # So REGISTERED SIGN + {0x00B0, 0x00B0}, // ; A # So DEGREE SIGN + {0x00B1, 0x00B1}, // ; A # Sm PLUS-MINUS SIGN + {0x00B2, 0x00B3}, // ; A # No [2] SUPERSCRIPT TWO..SUPERSCRIPT THREE + {0x00B4, 0x00B4}, // ; A # Sk ACUTE ACCENT + {0x00B6, 0x00B7}, // ; A # Po [2] PILCROW SIGN..MIDDLE DOT + {0x00B8, 0x00B8}, // ; A # Sk CEDILLA + {0x00B9, 0x00B9}, // ; A # No SUPERSCRIPT ONE + {0x00BA, 0x00BA}, // ; A # Lo MASCULINE ORDINAL INDICATOR + {0x00BC, 0x00BE}, // ; A # No [3] VULGAR FRACTION ONE QUARTER..VULGAR FRACTION THREE QUARTERS + {0x00BF, 0x00BF}, // ; A # Po INVERTED QUESTION MARK + {0x00C6, 0x00C6}, // ; A # Lu LATIN CAPITAL LETTER AE + {0x00D0, 0x00D0}, // ; A # Lu LATIN CAPITAL LETTER ETH + {0x00D7, 0x00D7}, // ; A # Sm MULTIPLICATION SIGN + {0x00D8, 0x00D8}, // ; A # Lu LATIN CAPITAL LETTER O WITH STROKE + {0x00DE, 0x00E1}, // ; A # L& [4] LATIN CAPITAL LETTER THORN..LATIN SMALL LETTER A WITH ACUTE + {0x00E6, 0x00E6}, // ; A # Ll LATIN SMALL LETTER AE + {0x00E8, 0x00EA}, // ; A # Ll [3] LATIN SMALL LETTER E WITH GRAVE..LATIN SMALL LETTER E WITH CIRCUMFLEX + {0x00EC, 0x00ED}, // ; A # Ll [2] LATIN SMALL LETTER I WITH GRAVE..LATIN SMALL LETTER I WITH ACUTE + {0x00F0, 0x00F0}, // ; A # Ll LATIN SMALL LETTER ETH + {0x00F2, 0x00F3}, // ; A # Ll [2] LATIN SMALL LETTER O WITH GRAVE..LATIN SMALL LETTER O WITH ACUTE + {0x00F7, 0x00F7}, // ; A # Sm DIVISION SIGN + {0x00F8, 0x00FA}, // ; A # Ll [3] LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER U WITH ACUTE + {0x00FC, 0x00FC}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS + {0x00FE, 0x00FE}, // ; A # Ll LATIN SMALL LETTER THORN + {0x0101, 0x0101}, // ; A # Ll LATIN SMALL LETTER A WITH MACRON + {0x0111, 0x0111}, // ; A # Ll LATIN SMALL LETTER D WITH STROKE + {0x0113, 0x0113}, // ; A # Ll LATIN SMALL LETTER E WITH MACRON + {0x011B, 0x011B}, // ; A # Ll LATIN SMALL LETTER E WITH CARON + {0x0126, 0x0127}, // ; A # L& [2] LATIN CAPITAL LETTER H WITH STROKE..LATIN SMALL LETTER H WITH STROKE + {0x012B, 0x012B}, // ; A # Ll LATIN SMALL LETTER I WITH MACRON + {0x0131, 0x0133}, // ; A # L& [3] LATIN SMALL LETTER DOTLESS I..LATIN SMALL LIGATURE IJ + {0x0138, 0x0138}, // ; A # Ll LATIN SMALL LETTER KRA + {0x013F, 0x0142}, // ; A # L& [4] LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN SMALL LETTER L WITH STROKE + {0x0144, 0x0144}, // ; A # Ll LATIN SMALL LETTER N WITH ACUTE + {0x0148, 0x014B}, // ; A # L& [4] LATIN SMALL LETTER N WITH CARON..LATIN SMALL LETTER ENG + {0x014D, 0x014D}, // ; A # Ll LATIN SMALL LETTER O WITH MACRON + {0x0152, 0x0153}, // ; A # L& [2] LATIN CAPITAL LIGATURE OE..LATIN SMALL LIGATURE OE + {0x0166, 0x0167}, // ; A # L& [2] LATIN CAPITAL LETTER T WITH STROKE..LATIN SMALL LETTER T WITH STROKE + {0x016B, 0x016B}, // ; A # Ll LATIN SMALL LETTER U WITH MACRON + {0x01CE, 0x01CE}, // ; A # Ll LATIN SMALL LETTER A WITH CARON + {0x01D0, 0x01D0}, // ; A # Ll LATIN SMALL LETTER I WITH CARON + {0x01D2, 0x01D2}, // ; A # Ll LATIN SMALL LETTER O WITH CARON + {0x01D4, 0x01D4}, // ; A # Ll LATIN SMALL LETTER U WITH CARON + {0x01D6, 0x01D6}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND MACRON + {0x01D8, 0x01D8}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE + {0x01DA, 0x01DA}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND CARON + {0x01DC, 0x01DC}, // ; A # Ll LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE + {0x0251, 0x0251}, // ; A # Ll LATIN SMALL LETTER ALPHA + {0x0261, 0x0261}, // ; A # Ll LATIN SMALL LETTER SCRIPT G + {0x02C4, 0x02C4}, // ; A # Sk MODIFIER LETTER UP ARROWHEAD + {0x02C7, 0x02C7}, // ; A # Lm CARON + {0x02C9, 0x02CB}, // ; A # Lm [3] MODIFIER LETTER MACRON..MODIFIER LETTER GRAVE ACCENT + {0x02CD, 0x02CD}, // ; A # Lm MODIFIER LETTER LOW MACRON + {0x02D0, 0x02D0}, // ; A # Lm MODIFIER LETTER TRIANGULAR COLON + {0x02D8, 0x02DB}, // ; A # Sk [4] BREVE..OGONEK + {0x02DD, 0x02DD}, // ; A # Sk DOUBLE ACUTE ACCENT + {0x02DF, 0x02DF}, // ; A # Sk MODIFIER LETTER CROSS ACCENT + {0x0300, 0x036F}, // ; A # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X + {0x0391, 0x03A1}, // ; A # Lu [17] GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO + {0x03A3, 0x03A9}, // ; A # Lu [7] GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER OMEGA + {0x03B1, 0x03C1}, // ; A # Ll [17] GREEK SMALL LETTER ALPHA..GREEK SMALL LETTER RHO + {0x03C3, 0x03C9}, // ; A # Ll [7] GREEK SMALL LETTER SIGMA..GREEK SMALL LETTER OMEGA + {0x0401, 0x0401}, // ; A # Lu CYRILLIC CAPITAL LETTER IO + {0x0410, 0x044F}, // ; A # L& [64] CYRILLIC CAPITAL LETTER A..CYRILLIC SMALL LETTER YA + {0x0451, 0x0451}, // ; A # Ll CYRILLIC SMALL LETTER IO + {0x2010, 0x2010}, // ; A # Pd HYPHEN + {0x2013, 0x2015}, // ; A # Pd [3] EN DASH..HORIZONTAL BAR + {0x2016, 0x2016}, // ; A # Po DOUBLE VERTICAL LINE + {0x2018, 0x2018}, // ; A # Pi LEFT SINGLE QUOTATION MARK + {0x2019, 0x2019}, // ; A # Pf RIGHT SINGLE QUOTATION MARK + {0x201C, 0x201C}, // ; A # Pi LEFT DOUBLE QUOTATION MARK + {0x201D, 0x201D}, // ; A # Pf RIGHT DOUBLE QUOTATION MARK + {0x2020, 0x2022}, // ; A # Po [3] DAGGER..BULLET + {0x2024, 0x2027}, // ; A # Po [4] ONE DOT LEADER..HYPHENATION POINT + {0x2030, 0x2030}, // ; A # Po PER MILLE SIGN + {0x2032, 0x2033}, // ; A # Po [2] PRIME..DOUBLE PRIME + {0x2035, 0x2035}, // ; A # Po REVERSED PRIME + {0x203B, 0x203B}, // ; A # Po REFERENCE MARK + {0x203E, 0x203E}, // ; A # Po OVERLINE + {0x2074, 0x2074}, // ; A # No SUPERSCRIPT FOUR + {0x207F, 0x207F}, // ; A # Lm SUPERSCRIPT LATIN SMALL LETTER N + {0x2081, 0x2084}, // ; A # No [4] SUBSCRIPT ONE..SUBSCRIPT FOUR + {0x20AC, 0x20AC}, // ; A # Sc EURO SIGN + {0x2103, 0x2103}, // ; A # So DEGREE CELSIUS + {0x2105, 0x2105}, // ; A # So CARE OF + {0x2109, 0x2109}, // ; A # So DEGREE FAHRENHEIT + {0x2113, 0x2113}, // ; A # Ll SCRIPT SMALL L + {0x2116, 0x2116}, // ; A # So NUMERO SIGN + {0x2121, 0x2122}, // ; A # So [2] TELEPHONE SIGN..TRADE MARK SIGN + {0x2126, 0x2126}, // ; A # Lu OHM SIGN + {0x212B, 0x212B}, // ; A # Lu ANGSTROM SIGN + {0x2153, 0x2154}, // ; A # No [2] VULGAR FRACTION ONE THIRD..VULGAR FRACTION TWO THIRDS + {0x215B, 0x215E}, // ; A # No [4] VULGAR FRACTION ONE EIGHTH..VULGAR FRACTION SEVEN EIGHTHS + {0x2160, 0x216B}, // ; A # Nl [12] ROMAN NUMERAL ONE..ROMAN NUMERAL TWELVE + {0x2170, 0x2179}, // ; A # Nl [10] SMALL ROMAN NUMERAL ONE..SMALL ROMAN NUMERAL TEN + {0x2189, 0x2189}, // ; A # No VULGAR FRACTION ZERO THIRDS + {0x2190, 0x2194}, // ; A # Sm [5] LEFTWARDS ARROW..LEFT RIGHT ARROW + {0x2195, 0x2199}, // ; A # So [5] UP DOWN ARROW..SOUTH WEST ARROW + {0x21B8, 0x21B9}, // ; A # So [2] NORTH WEST ARROW TO LONG BAR..LEFTWARDS ARROW TO BAR OVER RIGHTWARDS ARROW TO BAR + {0x21D2, 0x21D2}, // ; A # Sm RIGHTWARDS DOUBLE ARROW + {0x21D4, 0x21D4}, // ; A # Sm LEFT RIGHT DOUBLE ARROW + {0x21E7, 0x21E7}, // ; A # So UPWARDS WHITE ARROW + {0x2200, 0x2200}, // ; A # Sm FOR ALL + {0x2202, 0x2203}, // ; A # Sm [2] PARTIAL DIFFERENTIAL..THERE EXISTS + {0x2207, 0x2208}, // ; A # Sm [2] NABLA..ELEMENT OF + {0x220B, 0x220B}, // ; A # Sm CONTAINS AS MEMBER + {0x220F, 0x220F}, // ; A # Sm N-ARY PRODUCT + {0x2211, 0x2211}, // ; A # Sm N-ARY SUMMATION + {0x2215, 0x2215}, // ; A # Sm DIVISION SLASH + {0x221A, 0x221A}, // ; A # Sm SQUARE ROOT + {0x221D, 0x2220}, // ; A # Sm [4] PROPORTIONAL TO..ANGLE + {0x2223, 0x2223}, // ; A # Sm DIVIDES + {0x2225, 0x2225}, // ; A # Sm PARALLEL TO + {0x2227, 0x222C}, // ; A # Sm [6] LOGICAL AND..DOUBLE INTEGRAL + {0x222E, 0x222E}, // ; A # Sm CONTOUR INTEGRAL + {0x2234, 0x2237}, // ; A # Sm [4] THEREFORE..PROPORTION + {0x223C, 0x223D}, // ; A # Sm [2] TILDE OPERATOR..REVERSED TILDE + {0x2248, 0x2248}, // ; A # Sm ALMOST EQUAL TO + {0x224C, 0x224C}, // ; A # Sm ALL EQUAL TO + {0x2252, 0x2252}, // ; A # Sm APPROXIMATELY EQUAL TO OR THE IMAGE OF + {0x2260, 0x2261}, // ; A # Sm [2] NOT EQUAL TO..IDENTICAL TO + {0x2264, 0x2267}, // ; A # Sm [4] LESS-THAN OR EQUAL TO..GREATER-THAN OVER EQUAL TO + {0x226A, 0x226B}, // ; A # Sm [2] MUCH LESS-THAN..MUCH GREATER-THAN + {0x226E, 0x226F}, // ; A # Sm [2] NOT LESS-THAN..NOT GREATER-THAN + {0x2282, 0x2283}, // ; A # Sm [2] SUBSET OF..SUPERSET OF + {0x2286, 0x2287}, // ; A # Sm [2] SUBSET OF OR EQUAL TO..SUPERSET OF OR EQUAL TO + {0x2295, 0x2295}, // ; A # Sm CIRCLED PLUS + {0x2299, 0x2299}, // ; A # Sm CIRCLED DOT OPERATOR + {0x22A5, 0x22A5}, // ; A # Sm UP TACK + {0x22BF, 0x22BF}, // ; A # Sm RIGHT TRIANGLE + {0x2312, 0x2312}, // ; A # So ARC + {0x2460, 0x249B}, // ; A # No [60] CIRCLED DIGIT ONE..NUMBER TWENTY FULL STOP + {0x249C, 0x24E9}, // ; A # So [78] PARENTHESIZED LATIN SMALL LETTER A..CIRCLED LATIN SMALL LETTER Z + {0x24EB, 0x24FF}, // ; A # No [21] NEGATIVE CIRCLED NUMBER ELEVEN..NEGATIVE CIRCLED DIGIT ZERO + {0x2500, 0x254B}, // ; A # So [76] BOX DRAWINGS LIGHT HORIZONTAL..BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL + {0x2550, 0x2573}, // ; A # So [36] BOX DRAWINGS DOUBLE HORIZONTAL..BOX DRAWINGS LIGHT DIAGONAL CROSS + {0x2580, 0x258F}, // ; A # So [16] UPPER HALF BLOCK..LEFT ONE EIGHTH BLOCK + {0x2592, 0x2595}, // ; A # So [4] MEDIUM SHADE..RIGHT ONE EIGHTH BLOCK + {0x25A0, 0x25A1}, // ; A # So [2] BLACK SQUARE..WHITE SQUARE + {0x25A3, 0x25A9}, // ; A # So [7] WHITE SQUARE CONTAINING BLACK SMALL SQUARE..SQUARE WITH DIAGONAL CROSSHATCH FILL + {0x25B2, 0x25B3}, // ; A # So [2] BLACK UP-POINTING TRIANGLE..WHITE UP-POINTING TRIANGLE + {0x25B6, 0x25B6}, // ; A # So BLACK RIGHT-POINTING TRIANGLE + {0x25B7, 0x25B7}, // ; A # Sm WHITE RIGHT-POINTING TRIANGLE + {0x25BC, 0x25BD}, // ; A # So [2] BLACK DOWN-POINTING TRIANGLE..WHITE DOWN-POINTING TRIANGLE + {0x25C0, 0x25C0}, // ; A # So BLACK LEFT-POINTING TRIANGLE + {0x25C1, 0x25C1}, // ; A # Sm WHITE LEFT-POINTING TRIANGLE + {0x25C6, 0x25C8}, // ; A # So [3] BLACK DIAMOND..WHITE DIAMOND CONTAINING BLACK SMALL DIAMOND + {0x25CB, 0x25CB}, // ; A # So WHITE CIRCLE + {0x25CE, 0x25D1}, // ; A # So [4] BULLSEYE..CIRCLE WITH RIGHT HALF BLACK + {0x25E2, 0x25E5}, // ; A # So [4] BLACK LOWER RIGHT TRIANGLE..BLACK UPPER RIGHT TRIANGLE + {0x25EF, 0x25EF}, // ; A # So LARGE CIRCLE + {0x2605, 0x2606}, // ; A # So [2] BLACK STAR..WHITE STAR + {0x2609, 0x2609}, // ; A # So SUN + {0x260E, 0x260F}, // ; A # So [2] BLACK TELEPHONE..WHITE TELEPHONE + {0x261C, 0x261C}, // ; A # So WHITE LEFT POINTING INDEX + {0x261E, 0x261E}, // ; A # So WHITE RIGHT POINTING INDEX + {0x2640, 0x2640}, // ; A # So FEMALE SIGN + {0x2642, 0x2642}, // ; A # So MALE SIGN + {0x2660, 0x2661}, // ; A # So [2] BLACK SPADE SUIT..WHITE HEART SUIT + {0x2663, 0x2665}, // ; A # So [3] BLACK CLUB SUIT..BLACK HEART SUIT + {0x2667, 0x266A}, // ; A # So [4] WHITE CLUB SUIT..EIGHTH NOTE + {0x266C, 0x266D}, // ; A # So [2] BEAMED SIXTEENTH NOTES..MUSIC FLAT SIGN + {0x266F, 0x266F}, // ; A # Sm MUSIC SHARP SIGN + {0x269E, 0x269F}, // ; A # So [2] THREE LINES CONVERGING RIGHT..THREE LINES CONVERGING LEFT + {0x26BF, 0x26BF}, // ; A # So SQUARED KEY + {0x26C6, 0x26CD}, // ; A # So [8] RAIN..DISABLED CAR + {0x26CF, 0x26D3}, // ; A # So [5] PICK..CHAINS + {0x26D5, 0x26E1}, // ; A # So [13] ALTERNATE ONE-WAY LEFT WAY TRAFFIC..RESTRICTED LEFT ENTRY-2 + {0x26E3, 0x26E3}, // ; A # So HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE + {0x26E8, 0x26E9}, // ; A # So [2] BLACK CROSS ON SHIELD..SHINTO SHRINE + {0x26EB, 0x26F1}, // ; A # So [7] CASTLE..UMBRELLA ON GROUND + {0x26F4, 0x26F4}, // ; A # So FERRY + {0x26F6, 0x26F9}, // ; A # So [4] SQUARE FOUR CORNERS..PERSON WITH BALL + {0x26FB, 0x26FC}, // ; A # So [2] JAPANESE BANK SYMBOL..HEADSTONE GRAVEYARD SYMBOL + {0x26FE, 0x26FF}, // ; A # So [2] CUP ON BLACK SQUARE..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE + {0x273D, 0x273D}, // ; A # So HEAVY TEARDROP-SPOKED ASTERISK + {0x2776, 0x277F}, // ; A # No [10] DINGBAT NEGATIVE CIRCLED DIGIT ONE..DINGBAT NEGATIVE CIRCLED NUMBER TEN + {0x2B56, 0x2B59}, // ; A # So [4] HEAVY OVAL WITH OVAL INSIDE..HEAVY CIRCLED SALTIRE + {0x3248, 0x324F}, // ; A # No [8] CIRCLED NUMBER TEN ON BLACK SQUARE..CIRCLED NUMBER EIGHTY ON BLACK SQUARE + {0xE000, 0xF8FF}, // ; A # Co [6400] .. + {0xFE00, 0xFE0F}, // ; A # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 + {0xFFFD, 0xFFFD}, // ; A # So REPLACEMENT CHARACTER + {0x1F100, 0x1F10A}, // ; A # No [11] DIGIT ZERO FULL STOP..DIGIT NINE COMMA + {0x1F110, 0x1F12D}, // ; A # So [30] PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED CD + {0x1F130, 0x1F169}, // ; A # So [58] SQUARED LATIN CAPITAL LETTER A..NEGATIVE CIRCLED LATIN CAPITAL LETTER Z + {0x1F170, 0x1F18D}, // ; A # So [30] NEGATIVE SQUARED LATIN CAPITAL LETTER A..NEGATIVE SQUARED SA + {0x1F18F, 0x1F190}, // ; A # So [2] NEGATIVE SQUARED WC..SQUARE DJ + {0x1F19B, 0x1F1AC}, // ; A # So [18] SQUARED THREE D..SQUARED VOD + {0xE0100, 0xE01EF}, // ; A # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 + {0xF0000, 0xFFFFD}, // ; A # Co [65534] .. + {0x100000, 0x10FFFD} // ; A # Co [65534] .. + }; + const size_t num_ranges = sizeof(ranges) / sizeof(ranges[0]); + + int left = 0, right = num_ranges - 1; + + while (left <= right) { + int mid = left + (right - left) / 2; + + if (ucs >= ranges[mid].start && ucs <= ranges[mid].end) { + return 1; // Character is in the range + } else if (ucs < ranges[mid].start) { + right = mid - 1; + } else { + left = mid + 1; + } + } + return 0; // Character is not in any of the ranges +} + diff --git a/src/wcwidtha.h b/src/wcwidtha.h new file mode 100644 index 0000000..9931b01 --- /dev/null +++ b/src/wcwidtha.h @@ -0,0 +1,12 @@ +// wcwidtha.h + +#ifndef MK_WCWIDTHA_H +#define MK_WCWIDTHA_H + + +#include "wcwidth.h" + +// Is a character in the list of ambiguous width characters (for east asian display) +int mk_wcwidth_a(mk_wchar_t ucs); + +#endif // MK_WCWIDTHA_H