From 70b8fbe30ef285fc06b88e326d4f1b3924d29359 Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Fri, 3 Oct 2025 15:42:59 +0200 Subject: [PATCH 1/2] fix is_odd condition to ensure optimal number of iterations are used closes #32 should slightly reduce number of iterations during diffing and therefore slightly speedup the diff. May change the results slightly in edgecase (but overall the old results where still correct, this would only affect edgecases with multiple correct results, very unlikely to be noticed in practice) --- Cargo.lock | 7 +++++ Cargo.toml | 1 + src/myers.rs | 6 +++- src/myers/middle_snake.rs | 4 +++ src/tests.rs | 65 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 82 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 2f90261..f580fb6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,12 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "cov-mark" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f1d92727879fb4f24cec33a35e3bff74035541326cbc12ad44ba8886d1927b0" + [[package]] name = "dissimilar" version = "1.0.10" @@ -37,6 +43,7 @@ dependencies = [ name = "imara-diff" version = "0.2.0" dependencies = [ + "cov-mark", "expect-test", "hashbrown", "memchr", diff --git a/Cargo.toml b/Cargo.toml index bccb729..ef0c6da 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ unified_diff = [] [dev-dependencies] # criterion = "0.4.0" +cov-mark = "2.1.0" expect-test = "1.4.0" # git-repository = "0.25.0" # similar = { version = "2.2.0", features = ["bytes"] } diff --git a/src/myers.rs b/src/myers.rs index 63ac540..32c6ae6 100644 --- a/src/myers.rs +++ b/src/myers.rs @@ -99,7 +99,7 @@ impl Myers { unsafe { MiddleSnakeSearch::::new(self.kforward, file1, file2) }; let mut backwards_search = unsafe { MiddleSnakeSearch::::new(self.kbackward, file1, file2) }; - let is_odd = (file2.len() - file2.len()) & 1 != 0; + let is_odd = file2.len().wrapping_sub(file1.len()) & 1 != 0; let mut ec = 0; @@ -111,6 +111,8 @@ impl Myers { backwards_search.contains(k) && backwards_search.x_pos_at_diagonal(k) <= token_idx1 }) { + #[cfg(test)] + cov_mark::hit!(ODD_SPLIT); match res { SearchResult::Snake => found_snake = true, SearchResult::Found { @@ -135,6 +137,8 @@ impl Myers { if let Some(res) = backwards_search.run(file1, file2, |k, token_idx1| { forward_search.contains(k) && token_idx1 <= forward_search.x_pos_at_diagonal(k) }) { + #[cfg(test)] + cov_mark::hit!(EVEN_SPLIT); match res { SearchResult::Snake => found_snake = true, SearchResult::Found { diff --git a/src/myers/middle_snake.rs b/src/myers/middle_snake.rs index 4b2ba1d..fb2e71b 100644 --- a/src/myers/middle_snake.rs +++ b/src/myers/middle_snake.rs @@ -6,6 +6,7 @@ use crate::util::{common_postfix, common_prefix}; const SNAKE_CNT: u32 = 20; const K_HEUR: u32 = 4; +#[derive(Debug)] pub struct MiddleSnakeSearch { kvec: NonNull, kmin: i32, @@ -98,6 +99,8 @@ impl MiddleSnakeSearch { let mut res = None; let mut k = self.kmax; while k >= self.kmin { + #[cfg(test)] + cov_mark::hit!(SPLIT_SEARCH_ITER); let mut token_idx1 = if BACK { if self.x_pos_at_diagonal(k - 1) < self.x_pos_at_diagonal(k + 1) { self.x_pos_at_diagonal(k - 1) @@ -249,6 +252,7 @@ impl MiddleSnakeSearch { } } +#[derive(Debug)] pub enum SearchResult { Snake, Found { token_idx1: i32, token_idx2: i32 }, diff --git a/src/tests.rs b/src/tests.rs index fc0b4c2..6880783 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -118,6 +118,71 @@ fn foo() -> Bar{ } } +#[test] +fn myers_is_odd() { + let before = "a\nb\nx\ny\nx\n"; + let after = "b\na\nx\ny\n"; + + cov_mark::check!(ODD_SPLIT); + // if the check for odd doesn't work then + // we still find the correct result but the number of search + // iterations increases + cov_mark::check_count!(SPLIT_SEARCH_ITER, 9); + let input = InternedInput::new(before, after); + let diff = Diff::compute(Algorithm::Myers, &input); + expect![[r#" + @@ -1,5 +1,4 @@ + -a + b + +a + x + y + -x + "#]] + .assert_eq( + &diff + .unified_diff( + &BasicLineDiffPrinter(&input.interner), + UnifiedDiffConfig::default(), + &input, + ) + .to_string(), + ); +} +#[test] +fn myers_is_even() { + let before = "a\nb\nx\nx\ny\n"; + let after = "b\na\nx\ny\nx\n"; + + cov_mark::check!(EVEN_SPLIT); + // if the check for is_odd incorrectly always true then we take a fastpath + // when we shouldn't which always leads to inifite iterations/recursion + // still we check the number of iterations here in case the search + // is buggy in more subtle ways + cov_mark::check_count!(SPLIT_SEARCH_ITER, 15); + let input = InternedInput::new(before, after); + let diff = Diff::compute(Algorithm::Myers, &input); + expect![[r#" + @@ -1,5 +1,5 @@ + -a + b + -x + +a + x + y + +x + "#]] + .assert_eq( + &diff + .unified_diff( + &BasicLineDiffPrinter(&input.interner), + UnifiedDiffConfig::default(), + &input, + ) + .to_string(), + ); +} + #[test] fn identical_files() { let file = r#"fn foo() -> Bar{ From a27bd4e01346238e3591d98f5c4e3865958a690b Mon Sep 17 00:00:00 2001 From: Pascal Kuthe Date: Fri, 3 Oct 2025 15:43:37 +0200 Subject: [PATCH 2/2] bump hashbrown --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f580fb6..59140cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,15 +26,15 @@ dependencies = [ [[package]] name = "foldhash" -version = "0.1.5" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" [[package]] name = "hashbrown" -version = "0.15.5" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" dependencies = [ "foldhash", ] diff --git a/Cargo.toml b/Cargo.toml index ef0c6da..af703a2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,7 +17,7 @@ exclude = [ ] [dependencies] -hashbrown = { version = "0.15", default-features = false, features = ["default-hasher", "inline-more"] } +hashbrown = { version = ">=0.15,<=0.16", default-features = false, features = ["default-hasher", "inline-more"] } memchr = "2.7.4" [features]