From 1f80d9bf1b17fd056dda79f5ba1219bfa0665899 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Wed, 19 Oct 2022 03:14:03 +0800 Subject: [PATCH 1/6] Created WebAssembly bindings to a subset of similar's API --- Cargo.toml | 3 + src/text/abstraction.rs | 10 +++- wasm/Cargo.toml | 12 ++++ wasm/similar.wit | 116 +++++++++++++++++++++++++++++++++++++ wasm/src/lib.rs | 125 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 265 insertions(+), 1 deletion(-) create mode 100644 wasm/Cargo.toml create mode 100644 wasm/similar.wit create mode 100644 wasm/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index 276f96a..25630a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,9 @@ exclude = [ [package.metadata.docs.rs] all-features = true +[workspace] +members = ["wasm"] + [features] default = ["text"] text = [] diff --git a/src/text/abstraction.rs b/src/text/abstraction.rs index 99678ff..daf22ee 100644 --- a/src/text/abstraction.rs +++ b/src/text/abstraction.rs @@ -1,6 +1,6 @@ -use std::borrow::Cow; use std::hash::Hash; use std::ops::Range; +use std::{borrow::Cow, sync::Arc}; /// Reference to a [`DiffableStr`]. /// @@ -44,6 +44,14 @@ impl<'a, T: DiffableStr + ?Sized> DiffableStrRef for Cow<'a, T> { } } +impl DiffableStrRef for Arc { + type Output = T; + + fn as_diffable_str(&self) -> &T { + self + } +} + /// All supported diffable strings. /// /// The text module can work with different types of strings depending diff --git a/wasm/Cargo.toml b/wasm/Cargo.toml new file mode 100644 index 0000000..71974ee --- /dev/null +++ b/wasm/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "wasm" +version = "0.1.0" +edition = "2021" +publish = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +ouroboros = "0.15.5" +upstream = { version = "2.2.0", path = "..", package = "similar" } +wit-bindgen-rust = { git = "https://github.com/wasmerio/wit-bindgen", branch = "wasmer", version = "0.1.0" } diff --git a/wasm/similar.wit b/wasm/similar.wit new file mode 100644 index 0000000..7446f97 --- /dev/null +++ b/wasm/similar.wit @@ -0,0 +1,116 @@ +/// A quick way to get a unified diff as a string. +unified-diff: func( + alg: algorithm, + old: string, + new: string, + context-radius: u32, + header: option>, +) -> string + +resource config { + static default: func() -> config + /// Changes the algorithm. + /// + /// The default algorithm is myers. + algorithm: func(alg: algorithm) + /// Changes the newline termination flag. + /// + /// The default is automatic based on input. This flag controls the + /// behavior of [`TextDiff::iter_changes`] and unified diff generation + /// with regards to newlines. When the flag is set to `false` (which + /// is the default) then newlines are added. Otherwise the newlines + /// from the source sequences are reused. + newline-terminated: func(yes: bool) + /// Creates a diff of words. + /// + /// This splits the text into words and whitespace. + /// + /// Note on word diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. + diff-words: func(old: string, new: string) -> text-diff + /// Creates a diff of characters. + /// + /// Note on character diffs: because the text differ will tokenize the strings + /// into small segments it can be inconvenient to work with the results + /// depending on the use case. + diff-chars: func(old: string, new: string) -> text-diff +} + +/// Captures diff op codes for textual diffs. +resource text-diff { + /// The name of the algorithm that created the diff. + algorithm: func() -> algorithm + /// Returns true if items in the slice are newline terminated. + /// + /// This flag is used by the unified diff writer to determine if extra + /// newlines have to be added. + newline-terminated: func() -> bool + /// Return a measure of the sequences’ similarity in the range `0..=1`. + /// + /// A ratio of `1.0` means the two sequences are a complete match, a ratio + /// of `0.0` would indicate completely distinct sequences. + ratio: func() -> float32 + changes: func() -> list +} + +/// The different algorithms that can be used when diffing. +enum algorithm { + /// Myers' diff algorithm. + /// + /// * time: `O((N+M)D)` + /// * space `O(N+M)` + /// + /// See [the original article by Eugene W. Myers](http://www.xmailserver.org/diff2.pdf) + /// describing it. + /// + /// The implementation of this algorithm is based on the implementation by + /// Brandon Williams. + /// + /// # Heuristics + /// + /// At present this implementation of Myers' does not implement any more + /// advanced heuristics that would solve some pathological cases. For + /// instance passing two large and completely distinct sequences to the + /// algorithm will make it spin without making reasonable progress. + /// Currently, the only protection in the library against this is to pass a + /// deadline to the diffing algorithm. + /// + /// For potential improvements here see + /// [similar#15](https://github.com/mitsuhiko/similar/issues/15). + myers, + /// Patience diff algorithm. + /// + /// * time: `O(N log N + M log M + (N+M)D)` + /// * space: `O(N+M)` + /// + /// Tends to give more human-readable outputs. See [Bram Cohen's blog + /// post](https://bramcohen.livejournal.com/73318.html) describing it. + /// + /// This is based on the patience implementation of + /// [pijul](https://pijul.org/) by Pierre-Étienne Meunier. + patience, + /// Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm. + /// + /// time: O((NM)D log (M)D) + /// space O(MN) + lcs, +} + +/// The tag of a change. +enum change-tag { + /// The change indicates equality (not a change). + equal, + /// The change indicates deleted text. + delete, + /// The change indicates inserted text. + insert, +} + +record change { + tag: change-tag, + old-index: option, + new-index: option, + value: string, + missing-newline: bool, +} diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs new file mode 100644 index 0000000..07f87fc --- /dev/null +++ b/wasm/src/lib.rs @@ -0,0 +1,125 @@ +use std::cell::RefCell; + +use upstream::TextDiffConfig; +use wit_bindgen_rust::Handle; + +use crate::similar::{Algorithm, Change}; + +wit_bindgen_rust::export!("similar.wit"); + +pub struct Similar; + +impl crate::similar::Similar for Similar { + fn unified_diff( + alg: Algorithm, + old: String, + new: String, + context_radius: u32, + header: Option<(String, String)>, + ) -> String { + upstream::udiff::unified_diff( + alg.into(), + &old, + &new, + context_radius as usize, + header.as_ref().map(|(l, r)| (l.as_str(), r.as_str())), + ) + } +} + +pub struct Config(RefCell); + +impl crate::similar::Config for Config { + fn default() -> Handle { + Handle::new(Config(RefCell::new(TextDiffConfig::default()))) + } + + fn algorithm(&self, alg: Algorithm) { + self.0.borrow_mut().algorithm(alg.into()); + } + + fn newline_terminated(&self, yes: bool) { + self.0.borrow_mut().newline_terminated(yes); + } + + fn diff_words(&self, old: String, new: String) -> Handle { + Handle::new(TextDiff::new(old, new, |old, new| { + self.0.borrow().diff_words(old, new) + })) + } + + fn diff_chars(&self, old: String, new: String) -> Handle { + Handle::new(TextDiff::new(old, new, |old, new| { + self.0.borrow().diff_chars(old, new) + })) + } +} + +#[ouroboros::self_referencing] +pub struct TextDiff { + old: String, + new: String, + #[borrows(old, new)] + #[not_covariant] + diff: upstream::TextDiff<'this, 'this, 'this, str>, +} + +impl crate::similar::TextDiff for TextDiff { + fn algorithm(&self) -> Algorithm { + self.with_diff(|d| d.algorithm()).into() + } + + fn newline_terminated(&self) -> bool { + self.with_diff(|d| d.newline_terminated()) + } + + fn ratio(&self) -> f32 { + self.with_diff(|d| d.ratio()) + } + + fn changes(&self) -> Vec { + self.with_diff(|d| d.iter_all_changes().map(|c| c.into()).collect()) + } +} + +impl From for similar::Algorithm { + fn from(value: upstream::Algorithm) -> Self { + match value { + upstream::Algorithm::Myers => similar::Algorithm::Myers, + upstream::Algorithm::Patience => similar::Algorithm::Patience, + upstream::Algorithm::Lcs => similar::Algorithm::Lcs, + } + } +} + +impl From for upstream::Algorithm { + fn from(value: similar::Algorithm) -> Self { + match value { + similar::Algorithm::Myers => upstream::Algorithm::Myers, + similar::Algorithm::Patience => upstream::Algorithm::Patience, + similar::Algorithm::Lcs => upstream::Algorithm::Lcs, + } + } +} + +impl From> for similar::Change { + fn from(value: upstream::Change<&'_ str>) -> Self { + similar::Change { + tag: value.tag().into(), + old_index: value.old_index().map(|ix| ix.try_into().unwrap()), + new_index: value.new_index().map(|ix| ix.try_into().unwrap()), + value: value.value().to_string(), + missing_newline: value.missing_newline(), + } + } +} + +impl From for similar::ChangeTag { + fn from(value: upstream::ChangeTag) -> Self { + match value { + upstream::ChangeTag::Equal => similar::ChangeTag::Equal, + upstream::ChangeTag::Delete => similar::ChangeTag::Delete, + upstream::ChangeTag::Insert => similar::ChangeTag::Insert, + } + } +} From 1de7a5d9e571632d0594d771e0d2c70465b51f75 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Wed, 19 Oct 2022 03:31:00 +0800 Subject: [PATCH 2/6] Added functions from the utils module --- wasm/similar.wit | 20 ++++++++++++++++++++ wasm/src/lib.rs | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+) diff --git a/wasm/similar.wit b/wasm/similar.wit index 7446f97..273ffc1 100644 --- a/wasm/similar.wit +++ b/wasm/similar.wit @@ -1,3 +1,15 @@ +/// Shortcut for making a line level diff. +diff-lines: func(alg: algorithm, old: string, new: string) -> list> + +/// Shortcut for making a word level diff. +diff-words: func(alg: algorithm, old: string, new: string) -> list> + +/// Shortcut for making a character level diff. +diff-chars: func(alg: algorithm, old: string, new: string) -> list> + +/// Shortcut for diffing two lists of strings. +diff-lists: func(alg: algorithm, old: list, new: list) -> list>> + /// A quick way to get a unified diff as a string. unified-diff: func( alg: algorithm, @@ -21,6 +33,14 @@ resource config { /// is the default) then newlines are added. Otherwise the newlines /// from the source sequences are reused. newline-terminated: func(yes: bool) + /// Creates a diff of lines. + /// + /// This splits the text `old` and `new` into lines preserving newlines + /// in the input. Line diffs are very common and because of that enjoy + /// special handling in similar. When a line diff is created with this + /// method the `newline_terminated` flag is flipped to `true` and will + /// influence the behavior of unified diff generation. + diff-lines: func(old: string, new: string) -> text-diff /// Creates a diff of words. /// /// This splits the text into words and whitespace. diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 07f87fc..5e99dee 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -10,6 +10,38 @@ wit_bindgen_rust::export!("similar.wit"); pub struct Similar; impl crate::similar::Similar for Similar { + fn diff_lines(alg: Algorithm, old: String, new: String) -> Vec<(similar::ChangeTag, String)> { + upstream::utils::diff_lines(alg.into(), &old, &new) + .into_iter() + .map(|(tag, s)| (tag.into(), s.to_string())) + .collect() + } + + fn diff_words(alg: Algorithm, old: String, new: String) -> Vec<(similar::ChangeTag, String)> { + upstream::utils::diff_words(alg.into(), &old, &new) + .into_iter() + .map(|(tag, s)| (tag.into(), s.to_string())) + .collect() + } + + fn diff_chars(alg: Algorithm, old: String, new: String) -> Vec<(similar::ChangeTag, String)> { + upstream::utils::diff_chars(alg.into(), &old, &new) + .into_iter() + .map(|(tag, s)| (tag.into(), s.to_string())) + .collect() + } + + fn diff_lists( + alg: Algorithm, + old: Vec, + new: Vec, + ) -> Vec<(similar::ChangeTag, Vec)> { + upstream::utils::diff_slices(alg.into(), &old, &new) + .into_iter() + .map(|(tag, items)| (tag.into(), items.to_vec())) + .collect() + } + fn unified_diff( alg: Algorithm, old: String, @@ -42,6 +74,12 @@ impl crate::similar::Config for Config { self.0.borrow_mut().newline_terminated(yes); } + fn diff_lines(&self, old: String, new: String) -> Handle { + Handle::new(TextDiff::new(old, new, |old, new| { + self.0.borrow().diff_lines(old, new) + })) + } + fn diff_words(&self, old: String, new: String) -> Handle { Handle::new(TextDiff::new(old, new, |old, new| { self.0.borrow().diff_words(old, new) From 4bb0e38e064f8c52c1740341b0d41ae2caaabb67 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Wed, 19 Oct 2022 03:34:23 +0800 Subject: [PATCH 3/6] Added wapm-specific metadata to Cargo.toml --- wasm/Cargo.toml | 22 ++++++++++++++++++++-- wasm/src/lib.rs | 2 +- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/wasm/Cargo.toml b/wasm/Cargo.toml index 71974ee..03bd859 100644 --- a/wasm/Cargo.toml +++ b/wasm/Cargo.toml @@ -1,12 +1,30 @@ [package] name = "wasm" -version = "0.1.0" -edition = "2021" +version = "2.2.0" +authors = [ + "Armin Ronacher ", + "Pierre-Étienne Meunier ", + "Brandon Williams ", +] +rust-version = "1.41" +license = "Apache-2.0" +description = "A diff library for Rust" +repository = "https://github.com/mitsuhiko/similar" +edition = "2018" publish = false +[lib] +crate-type = ["rlib", "cdylib"] + # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] ouroboros = "0.15.5" upstream = { version = "2.2.0", path = "..", package = "similar" } wit-bindgen-rust = { git = "https://github.com/wasmerio/wit-bindgen", branch = "wasmer", version = "0.1.0" } + +[package.metadata.wapm] +namespace = "Michael-F-Bryan" +package = "similar" +abi = "none" +bindings = { wit-exports = "similar.wit", wit-bindgen = "0.1.0" } diff --git a/wasm/src/lib.rs b/wasm/src/lib.rs index 5e99dee..100429d 100644 --- a/wasm/src/lib.rs +++ b/wasm/src/lib.rs @@ -1,4 +1,4 @@ -use std::cell::RefCell; +use std::{cell::RefCell, convert::TryInto}; use upstream::TextDiffConfig; use wit_bindgen_rust::Handle; From 1242344f8bbe35ae8c1e4c59bdfd6cfa2c4493a6 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Wed, 19 Oct 2022 03:45:22 +0800 Subject: [PATCH 4/6] Added the project's README to Cargo.toml --- wasm/Cargo.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/wasm/Cargo.toml b/wasm/Cargo.toml index 03bd859..5dd0563 100644 --- a/wasm/Cargo.toml +++ b/wasm/Cargo.toml @@ -12,6 +12,7 @@ description = "A diff library for Rust" repository = "https://github.com/mitsuhiko/similar" edition = "2018" publish = false +readme = "../README.md" [lib] crate-type = ["rlib", "cdylib"] From 123ac03c1e2c7a3640a677cd91406996795b3519 Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Wed, 19 Oct 2022 03:50:08 +0800 Subject: [PATCH 5/6] Automatically published tagged versions to WAPM --- .github/workflows/releases.yml | 40 ++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 .github/workflows/releases.yml diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml new file mode 100644 index 0000000..3339ca5 --- /dev/null +++ b/.github/workflows/releases.yml @@ -0,0 +1,40 @@ +name: Releases + +on: + push: + tags: + - "v*" + workflow_dispatch: + +env: + RUST_BACKTRACE: 1 + CARGO_NET_GIT_FETCH_WITH_CLI: true + +jobs: + publish-to-wapm: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions-rs/toolchain@v1 + with: + toolchain: 1.41.0 + profile: minimal + override: true + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + - name: Install WebAssembly targets + run: rustup target add wasm32-unknown-unknown + - name: Setup Wasmer + uses: wasmerio/setup-wasmer@v1 + - name: Install cargo-wapm + uses: actions-rs/cargo@v1 + with: + command: install + args: cargo-wapm --verbose --debug + - name: Login to wapm.io + run: | + wapm config set registry.url https://registry.wapm.io + wapm login ${{ secrets.WAPM_IO_TOKEN }} + - name: Publish to wapm.io + run: cargo wapm + working-directory: wasm From 3a392b9490dd8a049b3411fda44d3c876ca1d07b Mon Sep 17 00:00:00 2001 From: Michael-F-Bryan Date: Wed, 19 Oct 2022 03:51:46 +0800 Subject: [PATCH 6/6] Revert the DiffableStrRef impl for Arc --- src/text/abstraction.rs | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/text/abstraction.rs b/src/text/abstraction.rs index daf22ee..99678ff 100644 --- a/src/text/abstraction.rs +++ b/src/text/abstraction.rs @@ -1,6 +1,6 @@ +use std::borrow::Cow; use std::hash::Hash; use std::ops::Range; -use std::{borrow::Cow, sync::Arc}; /// Reference to a [`DiffableStr`]. /// @@ -44,14 +44,6 @@ impl<'a, T: DiffableStr + ?Sized> DiffableStrRef for Cow<'a, T> { } } -impl DiffableStrRef for Arc { - type Output = T; - - fn as_diffable_str(&self) -> &T { - self - } -} - /// All supported diffable strings. /// /// The text module can work with different types of strings depending