Skip to content

Commit 38ff1db

Browse files
authored
Merge pull request #62 from TheSchemm/master
Fix for Directory Ordering compare function.
2 parents 2d772e7 + 79ce085 commit 38ff1db

File tree

2 files changed

+34
-17
lines changed

2 files changed

+34
-17
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ edition = "2018"
1111

1212
[dependencies]
1313
fnv = "1.0"
14+
icu_casemap = "1.5"
1415
uuid = "1"
1516

1617
[dev-dependencies]

src/internal/path.rs

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,24 @@
1+
use icu_casemap::CaseMapper;
12
use std::cmp::Ordering;
23
use std::io;
34
use std::path::{Component, Path, PathBuf};
45

56
// ========================================================================= //
67

78
const MAX_NAME_LEN: usize = 31;
9+
const CASE_MAPPER: CaseMapper = CaseMapper::new();
810

911
// ========================================================================= //
1012

11-
// according to the spec, "For each UTF-16 code point, convert to uppercase by
12-
// using the Unicode Default Case Conversion Algorithm, simple case conversion
13-
// variant (simple case foldings)"
14-
// it's not clear what that means, since neither case folding nor strict upper
15-
// case conversion yields convert('Ö') < convert('ß'), see the test case
16-
fn uppercase(s: &str) -> String {
17-
let mut upper = String::new();
18-
for c in s.chars() {
19-
match c {
20-
'ß' => upper.push('ẞ'),
21-
c => upper.extend(c.to_uppercase()),
22-
}
23-
}
24-
upper
13+
/// Converts a char to uppercase as defined in MS-CFB,
14+
/// using simple capitalization and the ability to add exceptions.
15+
/// Used when two directory entry names need to be compared.
16+
fn cfb_uppercase_char(c: char) -> char {
17+
// TODO: Edge cases can be added that appear
18+
// in the table from Appendix A, <3> Section 2.6.4
19+
20+
// Base case, just do a simple uppercase
21+
CASE_MAPPER.simple_uppercase(c)
2522
}
2623

2724
/// Compares two directory entry names according to CFB ordering, which is
@@ -35,7 +32,11 @@ pub fn compare_names(name1: &str, name2: &str) -> Ordering {
3532
// particular way of doing the uppercasing on individual UTF-16 code
3633
// units, along with a list of weird exceptions and corner cases. But
3734
// hopefully this is good enough for 99+% of the time.
38-
Ordering::Equal => uppercase(name1).cmp(&uppercase(name2)),
35+
Ordering::Equal => {
36+
let n1 = name1.chars().map(cfb_uppercase_char);
37+
let n2 = name2.chars().map(cfb_uppercase_char);
38+
n1.cmp(n2)
39+
}
3940
other => other,
4041
}
4142
}
@@ -100,8 +101,8 @@ pub fn path_from_name_chain(names: &[&str]) -> PathBuf {
100101
#[cfg(test)]
101102
mod tests {
102103
use super::{
103-
compare_names, name_chain_from_path, path_from_name_chain,
104-
validate_name,
104+
cfb_uppercase_char, compare_names, name_chain_from_path,
105+
path_from_name_chain, validate_name,
105106
};
106107
use std::cmp::Ordering;
107108
use std::path::{Path, PathBuf};
@@ -126,6 +127,21 @@ mod tests {
126127
),
127128
Ordering::Less
128129
);
130+
131+
let uppercase = "ßQÑ52Ç4ÅÁÔÂFÛCWCÙÂNË5Q=="
132+
.chars()
133+
.map(cfb_uppercase_char)
134+
.collect::<String>();
135+
136+
assert_eq!("ßQÑ52Ç4ÅÁÔÂFÛCWCÙÂNË5Q==", uppercase);
137+
138+
assert_eq!(
139+
compare_names(
140+
"ÜL43ÁMÆÛÏEKZÅYWÚÓVDÙÄÀ==",
141+
"ßQÑ52Ç4ÅÁÔÂFÛCWCÙÂNË5Q=="
142+
),
143+
Ordering::Less
144+
);
129145
}
130146

131147
#[test]

0 commit comments

Comments
 (0)