diff --git a/src/cmd/frequency.rs b/src/cmd/frequency.rs index 2cef402db..66cf2f4a4 100644 --- a/src/cmd/frequency.rs +++ b/src/cmd/frequency.rs @@ -208,7 +208,7 @@ impl Args { for (i, field) in nsel.select(row_work.into_iter()).enumerate() { field_work = { if let Ok(s) = simdutf8::basic::from_utf8(field) { - to_lowercase_into(s.trim(), &mut buf); + util::to_lowercase_into(s.trim(), &mut buf); buf.as_bytes().to_vec() } else { field.to_vec() @@ -252,17 +252,3 @@ impl Args { Ok((sel.select(headers).map(<[u8]>::to_vec).collect(), sel)) } } - -/// this is a non-allocating to_lowercase that uses an existing buffer -/// and should be faster than the stdlib version -/// TODO: if this proves to be faster per the benchmarks, we should use -/// this project-wide over the allocating stdlib version, and move it to utils.rs -#[inline] -fn to_lowercase_into(s: &str, buf: &mut String) { - buf.clear(); - for c in s.chars() { - for lc in c.to_lowercase() { - buf.push(lc); - } - } -} diff --git a/src/util.rs b/src/util.rs index dc5cee522..5a819c491 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1407,6 +1407,18 @@ pub async fn download_file( Ok(()) } +/// this is a non-allocating to_lowercase that uses an existing buffer +/// and should be faster than the allocating std::to_lowercase +#[inline] +pub fn to_lowercase_into(s: &str, buf: &mut String) { + buf.clear(); + for c in s.chars() { + for lc in c.to_lowercase() { + buf.push(lc); + } + } +} + /// load the first BUFFER*4 (128k) bytes of the file and check if it is utf8 pub fn isutf8_file(path: &Path) -> Result { let metadata = std::fs::metadata(path)?;