compare characters in brace expressions using proper case folding

Signed-off-by: Christian Brabandt <[email protected]>
chrisbra · Jul 24, 2024 · a11df33 · a11df33
1 parent 52c1a90
commit a11df33
Show file tree

Hide file tree

Showing 2 changed files with 24 additions and 11 deletions.
diff --git a/src/regexp.c b/src/regexp.c
@@ -1812,7 +1812,7 @@ cstrncmp(char_u *s1, char_u *s2, int *n)
 cstrchr(char_u *s, int c)
 {
     char_u	*p;
-    int		cc;
+    int		cc, lc;
 
     if (!rex.reg_ic || (!enc_utf8 && mb_char2len(c) > 1))
 	return vim_strchr(s, c);
@@ -1821,26 +1821,35 @@ cstrchr(char_u *s, int c)
     // faster (esp. when using MS Visual C++!).
     // For UTF-8 need to use folded case.
     if (enc_utf8 && c > 0x80)
+    {
 	cc = utf_fold(c);
+	lc = cc;
+    }
     else
-	 if (MB_ISUPPER(c))
-	cc = MB_TOLOWER(c);
-    else if (MB_ISLOWER(c))
-	cc = MB_TOUPPER(c);
-    else
-	return vim_strchr(s, c);
+	if (MB_ISUPPER(c))
+	{
+	    cc = MB_TOLOWER(c);
+	    lc = cc;
+	}
+	else if (MB_ISLOWER(c))
+	{
+	    cc = MB_TOUPPER(c);
+	    lc = c;
+	}
+	else
+	    return vim_strchr(s, c);
 
     if (has_mbyte)
     {
 	for (p = s; *p != NUL; p += (*mb_ptr2len)(p))
 	{
-	    if (enc_utf8 && c > 0x80)
+	    int uc = utf_ptr2char(p);
+	    if (enc_utf8 && (c > 0x80 || uc > 0x80))
 	    {
-		int uc = utf_ptr2char(p);
-
 		// Do not match an illegal byte.  E.g. 0xff matches 0xc3 0xbf,
 		// not 0xff.
-		if ((uc < 0x80 || uc != *p) && utf_fold(uc) == cc)
+		// compare with lower case of the character
+		if ((uc < 0x80 || uc != *p) && utf_fold(uc) == lc)
 		    return p;
 	    }
 	    else if (*p == c || *p == cc)

diff --git a/src/testdir/test_regexp_utf8.vim b/src/testdir/test_regexp_utf8.vim
@@ -606,11 +606,15 @@ func Test_search_multibyte_match_ascii()
     let noic_match = matchbufline('%', '\C\%u17f\%u17f', 1, '$')->mapnew({idx, val -> val.text})
     let ic_match2 = matchbufline('%', '\c\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
     let noic_match2 = matchbufline('%', '\C\%u17f\+', 1, '$')->mapnew({idx, val -> val.text})
+    let ic_match3 = matchbufline('%', '\c[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})
+    let noic_match3 = matchbufline('%', '\C[\u17f]\+', 1, '$')->mapnew({idx, val -> val.text})
 
     call assert_equal(['ss', '鰱鰱'], ic_match, "Ignorecase Regex-engine: " .. &re)
     call assert_equal(['鰱鰱'], noic_match, "No-Ignorecase Regex-engine: " .. &re)
     call assert_equal(['s', 'ss', '鰱鰱', '鰱'], ic_match2, "Ignorecase Regex-engine: " .. &re)
     call assert_equal(['鰱鰱','鰱'], noic_match2, "No-Ignorecase Regex-engine: " .. &re)
+    call assert_equal(['s', 'ss', '鰱鰱', '鰱'], ic_match3, "Ignorecase Collection Regex-engine: " .. &re)
+    call assert_equal(['鰱鰱','鰱'], noic_match3, "No-Ignorecase Collection Regex-engine: " .. &re)
   endfor
   bw!
 endfunc