From 6d9c52d7701a509abd76c36a113b30b3e25a4557 Mon Sep 17 00:00:00 2001 From: rhysd Date: Sat, 20 Apr 2024 22:16:37 +0900 Subject: [PATCH] reimplement `str::Lines` with memchr crate for optimizing chunk calculation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ``` chunk::large_file_per_10_lines time: [126.23 µs 126.82 µs 127.48 µs] change: [-33.516% -33.149% -32.796%] (p = 0.00 < 0.05) Performance has improved. Found 6 outliers among 100 measurements (6.00%) 6 (6.00%) high mild chunk::large_file_per_100_lines time: [84.162 µs 84.404 µs 84.710 µs] change: [-41.216% -40.959% -40.715%] (p = 0.00 < 0.05) Performance has improved. Found 4 outliers among 100 measurements (4.00%) 3 (3.00%) high mild 1 (1.00%) high severe chunk::large_file_per_1000_lines time: [76.495 µs 76.687 µs 76.870 µs] change: [-40.212% -40.011% -39.812%] (p = 0.00 < 0.05) Performance has improved. Found 4 outliers among 100 measurements (4.00%) 3 (3.00%) high mild 1 (1.00%) high severe chunk::large_file_per_3000_lines time: [64.525 µs 64.757 µs 64.997 µs] change: [-37.579% -37.230% -36.869%] (p = 0.00 < 0.05) Performance has improved. Found 11 outliers among 100 measurements (11.00%) 8 (8.00%) high mild 3 (3.00%) high severe Benchmarking syntect::ripgrep-small: Warming up for 3.0000 s syntect::ripgrep-small time: [73.191 ms 73.384 ms 73.614 ms] change: [-2.3840% -2.0182% -1.6277%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 4 (4.00%) high mild 3 (3.00%) high severe Benchmarking syntect::ripgrep-no-wrap: Warming up for 3.0000 s syntect::ripgrep-no-wrap time: [73.091 ms 73.244 ms 73.423 ms] change: [-2.8202% -2.4655% -2.1307%] (p = 0.00 < 0.05) Performance has improved. Found 6 outliers among 100 measurements (6.00%) 5 (5.00%) high mild 1 (1.00%) high severe Benchmarking syntect::ripgrep-background: Warming up for 3.0000 s syntect::ripgrep-background time: [73.195 ms 73.388 ms 73.615 ms] change: [-2.8219% -2.4260% -2.0027%] (p = 0.00 < 0.05) Performance has improved. Found 7 outliers among 100 measurements (7.00%) 5 (5.00%) high mild 2 (2.00%) high severe ``` --- src/chunk.rs | 60 ++++++++++++++++++++++++++++++++++++++++++++++++-- src/syntect.rs | 40 +-------------------------------- 2 files changed, 59 insertions(+), 41 deletions(-) diff --git a/src/chunk.rs b/src/chunk.rs index d23b709..69e9d14 100644 --- a/src/chunk.rs +++ b/src/chunk.rs @@ -1,7 +1,7 @@ use crate::grep::GrepMatch; use anyhow::Result; use encoding_rs::{Encoding, UTF_8}; -use memchr::memchr2; +use memchr::{memchr2, memchr_iter, Memchr}; use pathdiff::diff_paths; use std::cmp; use std::env; @@ -144,6 +144,62 @@ impl Files { } } +pub struct LinesInclusive<'a> { + lnum: usize, + prev: usize, + buf: &'a str, + iter: Memchr<'a>, +} + +impl<'a> LinesInclusive<'a> { + pub fn new(buf: &'a str) -> Self { + Self { + lnum: 1, + prev: 0, + buf, + iter: memchr_iter(b'\n', buf.as_bytes()), + } + } +} + +impl<'a> Iterator for LinesInclusive<'a> { + type Item = (&'a str, u64); + fn next(&mut self) -> Option { + if let Some(idx) = self.iter.next() { + let lnum = self.lnum; + let end = idx + 1; + let line = &self.buf[self.prev..end]; + self.prev = end; + self.lnum += 1; + Some((line, lnum as u64)) + } else if self.prev == self.buf.len() { + None + } else { + let line = &self.buf[self.prev..]; + self.prev = self.buf.len(); + Some((line, self.lnum as u64)) + } + } +} + +struct Lines<'a>(LinesInclusive<'a>); + +impl<'a> Lines<'a> { + pub fn new(buf: &'a str) -> Self { + Self(LinesInclusive::new(buf)) + } +} + +impl<'a> Iterator for Lines<'a> { + type Item = (&'a str, u64); + fn next(&mut self) -> Option { + let (line, lnum) = self.0.next()?; + let line = line.strip_suffix('\n').unwrap_or(line); + let line = line.strip_suffix('\r').unwrap_or(line); + Some((line, lnum)) + } +} + impl>> Files { fn calculate_chunk_range<'contents>( &self, @@ -225,7 +281,7 @@ impl>> Iterator for Files { Err(err) => return self.error_item(err.into()), }; // Assumes that matched lines are sorted by source location - let mut lines = contents.lines().enumerate().map(|(i, l)| (l, i as u64 + 1)); + let mut lines = Lines::new(&contents); let mut lmats = vec![LineMatch { line_number, ranges, diff --git a/src/syntect.rs b/src/syntect.rs index 382d10f..ecbe949 100644 --- a/src/syntect.rs +++ b/src/syntect.rs @@ -1,10 +1,9 @@ use crate::broken_pipe::IgnoreBrokenPipe as _; -use crate::chunk::File; +use crate::chunk::{File, LinesInclusive}; use crate::printer::{Printer, PrinterOptions, TermColorSupport, TextWrapMode}; use ansi_colours::ansi256_from_rgb; use anyhow::Result; use flate2::read::ZlibDecoder; -use memchr::{memchr_iter, Memchr}; use std::cmp; use std::io::{self, Stdout, StdoutLock, Write}; use std::ops::{Deref, DerefMut}; @@ -607,43 +606,6 @@ impl<'a> LineHighlighter<'a> { } } -// Like chunk::Lines, but includes newlines -struct LinesInclusive<'a> { - lnum: usize, - prev: usize, - buf: &'a str, - iter: Memchr<'a>, -} -impl<'a> LinesInclusive<'a> { - pub fn new(buf: &'a str) -> Self { - Self { - lnum: 1, - prev: 0, - buf, - iter: memchr_iter(b'\n', buf.as_bytes()), - } - } -} -impl<'a> Iterator for LinesInclusive<'a> { - type Item = (&'a str, u64); - fn next(&mut self) -> Option { - if let Some(idx) = self.iter.next() { - let lnum = self.lnum; - let end = idx + 1; - let line = &self.buf[self.prev..end]; - self.prev = end; - self.lnum += 1; - Some((line, lnum as u64)) - } else if self.prev == self.buf.len() { - None - } else { - let line = &self.buf[self.prev..]; - self.prev = self.buf.len(); - Some((line, self.lnum as u64)) - } - } -} - // Drawer is responsible for one-time screen drawing struct Drawer<'file, W: Write> { grid: bool,