Skip to content

Commit

Permalink
compare characters in brace expressions using proper case folding
Browse files Browse the repository at this point in the history
Signed-off-by: Christian Brabandt <[email protected]>
  • Loading branch information
chrisbra committed Jul 24, 2024
1 parent 52c1a90 commit a11df33
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 11 deletions.
31 changes: 20 additions & 11 deletions src/regexp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1812,7 +1812,7 @@ cstrncmp(char_u *s1, char_u *s2, int *n)
cstrchr(char_u *s, int c)
{
char_u *p;
int cc;
int cc, lc;

if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
return vim_strchr(s, c);
Expand All @@ -1821,26 +1821,35 @@ cstrchr(char_u *s, int c)
// faster (esp. when using MS Visual C++!).
// For UTF-8 need to use folded case.
if (enc_utf8 && c > 0x80)
{
cc = utf_fold(c);
lc = cc;
}
else
if (MB_ISUPPER(c))
cc = MB_TOLOWER(c);
else if (MB_ISLOWER(c))
cc = MB_TOUPPER(c);
else
return vim_strchr(s, c);
if (MB_ISUPPER(c))
{
cc = MB_TOLOWER(c);
lc = cc;
}
else if (MB_ISLOWER(c))
{
cc = MB_TOUPPER(c);
lc = c;
}
else
return vim_strchr(s, c);

if (has_mbyte)
{
for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
{
if (enc_utf8 && c > 0x80)
int uc = utf_ptr2char(p);
if (enc_utf8 && (c > 0x80 || uc > 0x80))
{
int uc = utf_ptr2char(p);

// Do not match an illegal byte. E.g. 0xff matches 0xc3 0xbf,
// not 0xff.
if ((uc < 0x80 || uc != *p) && utf_fold(uc) == cc)
// compare with lower case of the character
if ((uc < 0x80 || uc != *p) && utf_fold(uc) == lc)
return p;
}
else if (*p == c || *p == cc)
Expand Down
4 changes: 4 additions & 0 deletions src/testdir/test_regexp_utf8.vim
Original file line number Diff line number Diff line change
Expand Up @@ -606,11 +606,15 @@ func Test_search_multibyte_match_ascii()
let noic_match = matchbufline('%', '\C\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
let ic_match2 = matchbufline('%', '\c\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
let noic_match2 = matchbufline('%', '\C\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
let ic_match3 = matchbufline('%', '\c[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})
let noic_match3 = matchbufline('%', '\C[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})

call assert_equal(['ss', '鰱鰱'], ic_match, "Ignorecase Regex-engine: " .. &re)
call assert_equal(['鰱鰱'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
call assert_equal(['s', 'ss', '鰱鰱', ''], ic_match2, "Ignorecase Regex-engine: " .. &re)
call assert_equal(['鰱鰱',''], noic_match2, "No-Ignorecase Regex-engine: " .. &re)
call assert_equal(['s', 'ss', '鰱鰱', ''], ic_match3, "Ignorecase Collection Regex-engine: " .. &re)
call assert_equal(['鰱鰱',''], noic_match3, "No-Ignorecase Collection Regex-engine: " .. &re)
endfor
bw!
endfunc
Expand Down

0 comments on commit a11df33

Please sign in to comment.