Skip to content

Commit

Permalink
move non-allocating to_lowercase_into() to util.rs
Browse files Browse the repository at this point in the history
and use it project-wide
  • Loading branch information
jqnatividad committed Nov 18, 2023
1 parent cb765e0 commit 04c6e74
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 15 deletions.
16 changes: 1 addition & 15 deletions src/cmd/frequency.rs
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ impl Args {
for (i, field) in nsel.select(row_work.into_iter()).enumerate() {
field_work = {
if let Ok(s) = simdutf8::basic::from_utf8(field) {
to_lowercase_into(s.trim(), &mut buf);
util::to_lowercase_into(s.trim(), &mut buf);
buf.as_bytes().to_vec()
} else {
field.to_vec()
Expand Down Expand Up @@ -252,17 +252,3 @@ impl Args {
Ok((sel.select(headers).map(<[u8]>::to_vec).collect(), sel))
}
}

/// this is a non-allocating to_lowercase that uses an existing buffer
/// and should be faster than the stdlib version
/// TODO: if this proves to be faster per the benchmarks, we should use
/// this project-wide over the allocating stdlib version, and move it to utils.rs
#[inline]
fn to_lowercase_into(s: &str, buf: &mut String) {
buf.clear();
for c in s.chars() {
for lc in c.to_lowercase() {
buf.push(lc);
}
}
}
12 changes: 12 additions & 0 deletions src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1407,6 +1407,18 @@ pub async fn download_file(
Ok(())
}

/// this is a non-allocating to_lowercase that uses an existing buffer
/// and should be faster than the allocating std::to_lowercase
#[inline]
pub fn to_lowercase_into(s: &str, buf: &mut String) {
buf.clear();
for c in s.chars() {
for lc in c.to_lowercase() {
buf.push(lc);
}
}
}

/// load the first BUFFER*4 (128k) bytes of the file and check if it is utf8
pub fn isutf8_file(path: &Path) -> Result<bool, CliError> {
let metadata = std::fs::metadata(path)?;
Expand Down

0 comments on commit 04c6e74

Please sign in to comment.