Skip to content

Commit

Permalink
Add support for Unicode digits
Browse files Browse the repository at this point in the history
  • Loading branch information
tompazourek committed Apr 14, 2024
1 parent 6ec645d commit c303896
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 18 deletions.
88 changes: 70 additions & 18 deletions src/NaturalSort.Extension/NaturalSortComparer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -125,9 +125,26 @@ public int Compare(string str1, string str2)
var digit1 = i < paddingLength1 ? paddingChar : str1[startIndex1 + i - paddingLength1];
var digit2 = i < paddingLength2 ? paddingChar : str2[startIndex2 + i - paddingLength2];

var digitCompare = digit1.CompareTo(digit2);
if (digitCompare != 0)
return digitCompare;
if (digit1 is >= '0' and <= '9' && digit2 is >= '0' and <= '9')
{
// both digits are ordinary 0 to 9
var digitCompare = digit1.CompareTo(digit2);
if (digitCompare != 0)
return digitCompare;
}
else
{
// one or both digits is unicode, compare parsed numeric values, and only if they are same, compare as char
var digitNumeric1 = char.GetNumericValue(digit1);
var digitNumeric2 = char.GetNumericValue(digit2);
var digitNumericCompare = digitNumeric1.CompareTo(digitNumeric2);
if (digitNumericCompare != 0)
return digitNumericCompare;

var digitCompare = digit1.CompareTo(digit2);
if (digitCompare != 0)
return digitCompare;
}
}

// if the numbers are equal, we compare how much we padded the strings
Expand Down Expand Up @@ -164,19 +181,54 @@ public int Compare(string str1, string str2)
}

private static byte GetTokenFromChar(char c)
=> c >= 'a'
? c <= 'z'
? TokenLetters
: c < 128
? TokenOther
: char.IsLetter(c)
? TokenLetters
: TokenOther
: c >= 'A'
? c <= 'Z'
? TokenLetters
: TokenOther
: c is >= '0' and <= '9'
? TokenDigits
: TokenOther;
{
if (c >= 'a')
{
if (c <= 'z')
{
return TokenLetters;
}
else if (c < 128)
{
return TokenOther;
}
else if (char.IsLetter(c))
{
return TokenLetters;
}
else if (char.IsDigit(c))
{
return TokenDigits;
}
else
{
return TokenOther;
}
}
else
{
if (c >= 'A')
{
if (c <= 'Z')
{
return TokenLetters;
}
else
{
return TokenOther;
}
}
else
{
if (c is >= '0' and <= '9')
{
return TokenDigits;
}
else
{
return TokenOther;
}
}
}
}
}
8 changes: 8 additions & 0 deletions tests/NaturalSort.Extension.Tests/NaturalSortComparerTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,14 @@ private static void RunTests_StringComparisonOrdinalIgnoreCase(string[] input, s
new[] { "x", "x x", "x!x", "x#x", "x%x", "x&x", "x(x", "x)x", "x,x", "x.x", "x;x", "x@x", "x[x", "x]x", "x^x", "x_x", "x{x", "x}x", "x~x", "x0x", "x1x", "x2x", "x3x", "x4x", "x5x", "x6x", "x7x", "x8x", "x9x", "xAx", "xBx", "xCx", "xDx", "xEx", "xFx", "xGx", "xHx", "xIx", "xJx", "xKx", "xLx", "xMx", "xNx", "xOx", "xPx", "xQx", "xRx", "xSx", "xTx", "xUx", "xVx", "xWx", "xx", "xXx", "xYx", "xZx" },
new[] { "x", "x x", "x!x", "x#x", "x%x", "x&x", "x(x", "x)x", "x,x", "x.x", "x;x", "x@x", "x[x", "x]x", "x^x", "x_x", "x{x", "x}x", "x~x", "x0x", "x1x", "x2x", "x3x", "x4x", "x5x", "x6x", "x7x", "x8x", "x9x", "xAx", "xBx", "xCx", "xDx", "xEx", "xFx", "xGx", "xHx", "xIx", "xJx", "xKx", "xLx", "xMx", "xNx", "xOx", "xPx", "xQx", "xRx", "xSx", "xTx", "xUx", "xVx", "xWx", "xx", "xXx", "xYx", "xZx" }
)]
[InlineData(
new[] { "A33", "A3", "A11", "A10", "Z", "A2", "A", "A੨", "A22", "A੨੨" },
new[] { "A", "A2", "A੨", "A3", "A10", "A11", "A22", "A੨੨", "A33", "Z" }
)]
[InlineData(
new[] { "A", "A10", "A11", "Z", "A੨", "A੨੨" },
new[] { "A", "A੨", "A10", "A11", "A੨੨", "Z" }
)]
public void WindowsExplorer(string[] input, string[] expected) => RunTests(input, expected);

/// <remarks>
Expand Down

0 comments on commit c303896

Please sign in to comment.