From 47a460d01658aa990f6425a54131517491dd8e5e Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Sat, 26 Apr 2025 23:13:33 +0200 Subject: [PATCH 1/8] feat: new pattern syntax --- harper-core/src/linting/despite_of.rs | 8 +- harper-core/src/linting/modal_of.rs | 46 ++--- harper-core/src/patterns/any_pattern.rs | 1 + harper-core/src/patterns/mod.rs | 1 + .../src/patterns/new_syntax_experiment.rs | 170 ++++++++++++++++++ .../src/patterns/whitespace_pattern.rs | 1 + 6 files changed, 185 insertions(+), 42 deletions(-) create mode 100644 harper-core/src/patterns/new_syntax_experiment.rs diff --git a/harper-core/src/linting/despite_of.rs b/harper-core/src/linting/despite_of.rs index 90ed3d9f8..eaa9cfded 100644 --- a/harper-core/src/linting/despite_of.rs +++ b/harper-core/src/linting/despite_of.rs @@ -1,6 +1,6 @@ use crate::{ Token, TokenStringExt, - patterns::{Pattern, SequencePattern}, + patterns::{Pattern, new_syntax_experiment::prelude::*}, }; use super::{Lint, LintKind, PatternLinter, Suggestion}; @@ -11,12 +11,8 @@ pub struct DespiteOf { impl Default for DespiteOf { fn default() -> Self { - let pattern = SequencePattern::aco("despite") - .then_whitespace() - .then_exact_word("of"); - Self { - pattern: Box::new(pattern), + pattern: Box::new(seq!["despite", WS, exact("of")]), } } } diff --git a/harper-core/src/linting/modal_of.rs b/harper-core/src/linting/modal_of.rs index e5b1291e6..b03ce7797 100644 --- a/harper-core/src/linting/modal_of.rs +++ b/harper-core/src/linting/modal_of.rs @@ -1,6 +1,6 @@ use crate::{ Lrc, Token, TokenStringExt, - patterns::{EitherPattern, Pattern, SequencePattern, WordSet}, + patterns::{Pattern, new_syntax_experiment::prelude::*}, }; use super::{Lint, LintKind, PatternLinter, Suggestion}; @@ -21,43 +21,17 @@ impl Default for ModalOf { words.add(&format!("{}n't", word)); }); - let modal_of = Lrc::new( - SequencePattern::default() - .then(words) - .then_whitespace() - .t_aco("of"), - ); - - let ws_course = Lrc::new(SequencePattern::default().then_whitespace().t_aco("course")); - - let modal_of_course = Lrc::new( - SequencePattern::default() - .then(modal_of.clone()) - .then(ws_course.clone()), - ); - - let anyword_might_of = Lrc::new( - SequencePattern::default() - .then_any_word() - .then_whitespace() - .t_aco("might") - .then_whitespace() - .t_aco("of"), - ); - - let anyword_might_of_course = Lrc::new( - SequencePattern::default() - .then(anyword_might_of.clone()) - .then(ws_course.clone()), - ); + let modal_of = Lrc::new(seq![words, WS, "of"]); + let anyword_might_of = Lrc::new(seq![WORD, WS, "might", WS, "of"]); Self { - pattern: Box::new(EitherPattern::new(vec![ - Box::new(anyword_might_of_course), - Box::new(modal_of_course), - Box::new(anyword_might_of), - Box::new(modal_of), - ])), + pattern: Box::new(Choice::new(( + // TODO: Use optional for + seq![anyword_might_of.clone(), WS, "course"], + seq![modal_of.clone(), WS, "course"], + anyword_might_of, + modal_of, + ))), } } } diff --git a/harper-core/src/patterns/any_pattern.rs b/harper-core/src/patterns/any_pattern.rs index 2d68b4516..9521a1f49 100644 --- a/harper-core/src/patterns/any_pattern.rs +++ b/harper-core/src/patterns/any_pattern.rs @@ -5,6 +5,7 @@ use crate::Token; use super::Pattern; /// A [`Pattern`] that will match any single token. +#[derive(Clone, Copy)] pub struct AnyPattern; impl Pattern for AnyPattern { diff --git a/harper-core/src/patterns/mod.rs b/harper-core/src/patterns/mod.rs index 2750c6c36..594a1bc9c 100644 --- a/harper-core/src/patterns/mod.rs +++ b/harper-core/src/patterns/mod.rs @@ -18,6 +18,7 @@ mod implies_quantity; mod indefinite_article; mod invert; mod naive_pattern_group; +pub mod new_syntax_experiment; mod nominal_phrase; mod pattern_map; mod repeating_pattern; diff --git a/harper-core/src/patterns/new_syntax_experiment.rs b/harper-core/src/patterns/new_syntax_experiment.rs new file mode 100644 index 000000000..23c60ca80 --- /dev/null +++ b/harper-core/src/patterns/new_syntax_experiment.rs @@ -0,0 +1,170 @@ +use std::num::NonZeroUsize; + +use crate::{CharString, Token}; + +use super::{AnyCapitalization, AnyPattern, Pattern, WhitespacePattern}; + +pub trait IntoPattern { + type Output: Pattern + 'static; + fn into_pattern(self) -> Self::Output; +} +impl IntoPattern for T { + type Output = T; + fn into_pattern(self) -> Self::Output { + self + } +} +impl IntoPattern for &str { + type Output = AnyCapitalization; + fn into_pattern(self) -> Self::Output { + AnyCapitalization::of(self) + } +} + +pub trait IntoPatternList { + fn into_pattern_list(self) -> Vec>; +} +impl IntoPatternList for Vec> { + #[inline] + fn into_pattern_list(self) -> Vec> { + self + } +} +impl IntoPatternList for () { + #[inline] + fn into_pattern_list(self) -> Vec> { + Vec::new() + } +} +impl IntoPatternList for P { + #[inline] + fn into_pattern_list(self) -> Vec> { + vec![Box::new(self.into_pattern())] + } +} + +macro_rules! impl_into_pattern_list { + ($($name:ident = $index:tt),+) => { + impl<$($name: IntoPattern),+> IntoPatternList for ($($name),+) { + #[inline] + fn into_pattern_list(self) -> Vec> { + vec![$(Box::new(self.$index.into_pattern())),+] + } + } + }; +} +impl_into_pattern_list!(A = 0, B = 1); +impl_into_pattern_list!(A = 0, B = 1, C = 2); +impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3); +impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4); +impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5); +impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6); +impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7); +#[rustfmt::skip] +impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7, I = 8); +#[rustfmt::skip] +impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7, I = 8, J = 9); +#[rustfmt::skip] +impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7, I = 8, J = 9, K = 10); +#[rustfmt::skip] +impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7, I = 8, J = 9, K = 10, L = 11); + +pub struct Sequence { + pub patterns: Vec>, +} +impl Sequence { + pub fn new(patterns: impl IntoPatternList) -> Self { + Self { + patterns: patterns.into_pattern_list(), + } + } +} +impl Pattern for Sequence { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + let mut tok_cursor = 0; + + for pat in &self.patterns { + let match_length = pat.matches(&tokens[tok_cursor..], source)?; + tok_cursor += match_length.get(); + } + + NonZeroUsize::new(tok_cursor) + } +} + +pub struct Choice { + pub patterns: Vec>, +} +impl Choice { + pub fn new(patterns: impl IntoPatternList) -> Self { + Self { + patterns: patterns.into_pattern_list(), + } + } +} +impl Pattern for Choice { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + let mut longest = 0; + + for pattern in self.patterns.iter() { + let match_len = pattern.matches(tokens, source).map_or(0, NonZeroUsize::get); + longest = longest.max(match_len); + } + + NonZeroUsize::new(longest) + } +} + +struct ExactWord { + word: CharString, +} +impl Pattern for ExactWord { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + let tok = tokens.first()?; + if !tok.kind.is_word() { + return None; + } + if tok.span.len() != self.word.len() { + return None; + } + + let chars = tok.span.get_content(source); + let eq = chars == self.word.as_slice(); + + NonZeroUsize::new(if eq { 1 } else { 0 }) + } +} + +pub fn exact(word: &str) -> impl Pattern { + ExactWord { + word: word.chars().collect(), + } +} + +#[derive(Clone, Copy)] +pub struct WordTokenPattern; +impl Pattern for WordTokenPattern { + fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { + let tok = tokens.first()?; + if !tok.kind.is_word() { + return None; + } + NonZeroUsize::new(1) + } +} + +pub const ANY: AnyPattern = AnyPattern; +pub const WORD: WordTokenPattern = WordTokenPattern; +pub const WS: WhitespacePattern = WhitespacePattern; + +pub mod prelude { + pub use super::super::{Pattern, WordSet}; + pub use super::{ANY, Choice, Sequence, WORD, WS, exact}; + + macro_rules! seq { + ($($item:expr),* $(,)?) => { + Sequence::new(($($item),*)) + }; + } + pub(crate) use seq; +} diff --git a/harper-core/src/patterns/whitespace_pattern.rs b/harper-core/src/patterns/whitespace_pattern.rs index 2ead95e6f..d540b2de5 100644 --- a/harper-core/src/patterns/whitespace_pattern.rs +++ b/harper-core/src/patterns/whitespace_pattern.rs @@ -2,6 +2,7 @@ use std::num::NonZeroUsize; use super::Pattern; +#[derive(Clone, Copy)] pub struct WhitespacePattern; impl Pattern for WhitespacePattern { From fcf0d67a9020ece694099676095251f3078e3885 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Sun, 27 Apr 2025 02:28:18 +0200 Subject: [PATCH 2/8] feat: zero-width patterns --- harper-core/src/linting/no_oxford_comma.rs | 4 ++-- harper-core/src/linting/oxford_comma.rs | 4 ++-- harper-core/src/linting/pattern_linter.rs | 5 ++--- harper-core/src/patterns/all.rs | 8 +++---- .../src/patterns/any_capitalization.rs | 6 ++--- harper-core/src/patterns/any_pattern.rs | 6 ++--- harper-core/src/patterns/either_pattern.rs | 22 +++++++++++-------- harper-core/src/patterns/exact_phrase.rs | 4 +--- harper-core/src/patterns/implies_quantity.rs | 12 +++++----- .../src/patterns/indefinite_article.rs | 4 +--- harper-core/src/patterns/invert.rs | 12 +++++----- harper-core/src/patterns/mod.rs | 16 +++++++++----- .../src/patterns/naive_pattern_group.rs | 4 +--- harper-core/src/patterns/nominal_phrase.rs | 6 ++--- harper-core/src/patterns/pattern_map.rs | 4 +--- harper-core/src/patterns/repeating_pattern.rs | 18 +++++++++------ harper-core/src/patterns/sequence_pattern.rs | 18 ++++++--------- harper-core/src/patterns/similar_to_phrase.rs | 4 +--- .../src/patterns/split_compound_word.rs | 6 ++--- .../src/patterns/whitespace_pattern.rs | 16 ++++++-------- .../src/patterns/within_edit_distance.rs | 9 +++++--- .../src/patterns/word_pattern_group.rs | 4 +--- harper-core/src/patterns/word_set.rs | 6 ++--- 23 files changed, 90 insertions(+), 108 deletions(-) diff --git a/harper-core/src/linting/no_oxford_comma.rs b/harper-core/src/linting/no_oxford_comma.rs index 143ef96bb..03528f7a8 100644 --- a/harper-core/src/linting/no_oxford_comma.rs +++ b/harper-core/src/linting/no_oxford_comma.rs @@ -65,12 +65,12 @@ impl Linter for NoOxfordComma { if let Some(match_len) = match_len { let lint = self.match_to_lint( - &sentence[tok_cursor..tok_cursor + match_len.get()], + &sentence[tok_cursor..tok_cursor + match_len], document.get_source(), ); lints.extend(lint); - tok_cursor += match_len.get(); + tok_cursor += match_len; } else { tok_cursor += 1; } diff --git a/harper-core/src/linting/oxford_comma.rs b/harper-core/src/linting/oxford_comma.rs index a825c597c..b253baf47 100644 --- a/harper-core/src/linting/oxford_comma.rs +++ b/harper-core/src/linting/oxford_comma.rs @@ -85,12 +85,12 @@ impl Linter for OxfordComma { if let Some(match_len) = match_len { let lint = self.match_to_lint( - &sentence[tok_cursor..tok_cursor + match_len.get()], + &sentence[tok_cursor..tok_cursor + match_len], document.get_source(), ); lints.extend(lint); - tok_cursor += match_len.get(); + tok_cursor += match_len; } else { tok_cursor += 1; } diff --git a/harper-core/src/linting/pattern_linter.rs b/harper-core/src/linting/pattern_linter.rs index c34296760..e5bb59533 100644 --- a/harper-core/src/linting/pattern_linter.rs +++ b/harper-core/src/linting/pattern_linter.rs @@ -54,11 +54,10 @@ pub fn run_on_chunk(linter: &impl PatternLinter, chunk: &[Token], source: &[char let match_len = linter.pattern().matches(&chunk[tok_cursor..], source); if let Some(match_len) = match_len { - let lint = - linter.match_to_lint(&chunk[tok_cursor..tok_cursor + match_len.get()], source); + let lint = linter.match_to_lint(&chunk[tok_cursor..tok_cursor + match_len], source); lints.extend(lint); - tok_cursor += match_len.get(); + tok_cursor += match_len; } else { tok_cursor += 1; } diff --git a/harper-core/src/patterns/all.rs b/harper-core/src/patterns/all.rs index 7e5122581..648833aee 100644 --- a/harper-core/src/patterns/all.rs +++ b/harper-core/src/patterns/all.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::Token; use super::Pattern; @@ -24,14 +22,14 @@ impl All { } impl Pattern for All { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { let mut max = 0; for pattern in &self.children { let len = pattern.matches(tokens, source)?; - max = max.max(len.get()); + max = max.max(len); } - NonZeroUsize::new(max) + Some(max) } } diff --git a/harper-core/src/patterns/any_capitalization.rs b/harper-core/src/patterns/any_capitalization.rs index 67cecab25..0a6f94ed6 100644 --- a/harper-core/src/patterns/any_capitalization.rs +++ b/harper-core/src/patterns/any_capitalization.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::{CharString, Token}; use super::Pattern; @@ -23,7 +21,7 @@ impl AnyCapitalization { } impl Pattern for AnyCapitalization { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { let tok = tokens.first()?; if !tok.kind.is_word() { @@ -41,6 +39,6 @@ impl Pattern for AnyCapitalization { .zip(&self.word) .all(|(a, b)| a.eq_ignore_ascii_case(b)); - NonZeroUsize::new(if partial_match { 1 } else { 0 }) + if partial_match { Some(1) } else { None } } } diff --git a/harper-core/src/patterns/any_pattern.rs b/harper-core/src/patterns/any_pattern.rs index 2d68b4516..44063572a 100644 --- a/harper-core/src/patterns/any_pattern.rs +++ b/harper-core/src/patterns/any_pattern.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::Token; use super::Pattern; @@ -8,7 +6,7 @@ use super::Pattern; pub struct AnyPattern; impl Pattern for AnyPattern { - fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { - NonZeroUsize::new(if tokens.is_empty() { 0 } else { 1 }) + fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { + if tokens.is_empty() { None } else { Some(1) } } } diff --git a/harper-core/src/patterns/either_pattern.rs b/harper-core/src/patterns/either_pattern.rs index 512818828..d4ad6ec02 100644 --- a/harper-core/src/patterns/either_pattern.rs +++ b/harper-core/src/patterns/either_pattern.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::Token; use super::Pattern; @@ -21,17 +19,23 @@ impl EitherPattern { } impl Pattern for EitherPattern { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - let mut longest = 0; + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + let mut longest = None; for pattern in self.patterns.iter() { - let match_len = pattern.matches(tokens, source).map_or(0, NonZeroUsize::get); - - if match_len > longest { - longest = match_len + let Some(match_len) = pattern.matches(tokens, source) else { + continue; + }; + + if let Some(longest_len) = longest { + if match_len > longest_len { + longest = Some(match_len); + } + } else { + longest = Some(match_len); } } - NonZeroUsize::new(longest) + longest } } diff --git a/harper-core/src/patterns/exact_phrase.rs b/harper-core/src/patterns/exact_phrase.rs index 9eb543cd2..a306e7565 100644 --- a/harper-core/src/patterns/exact_phrase.rs +++ b/harper-core/src/patterns/exact_phrase.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::{Document, Token, TokenKind}; use super::{AnyCapitalization, Pattern, SequencePattern}; @@ -46,7 +44,7 @@ impl ExactPhrase { } impl Pattern for ExactPhrase { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { self.inner.matches(tokens, source) } } diff --git a/harper-core/src/patterns/implies_quantity.rs b/harper-core/src/patterns/implies_quantity.rs index 06a96cf27..2ba59f550 100644 --- a/harper-core/src/patterns/implies_quantity.rs +++ b/harper-core/src/patterns/implies_quantity.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::{Token, TokenKind}; use super::Pattern; @@ -38,12 +36,12 @@ impl ImpliesQuantity { } impl Pattern for ImpliesQuantity { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - NonZeroUsize::new(if Self::implies_plurality(tokens, source).is_some() { - 1 + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + if Self::implies_plurality(tokens, source).is_some() { + Some(1) } else { - 0 - }) + None + } } } diff --git a/harper-core/src/patterns/indefinite_article.rs b/harper-core/src/patterns/indefinite_article.rs index 6ef2bb597..498311d30 100644 --- a/harper-core/src/patterns/indefinite_article.rs +++ b/harper-core/src/patterns/indefinite_article.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::Token; use super::{Pattern, WordSet}; @@ -17,7 +15,7 @@ impl Default for IndefiniteArticle { } impl Pattern for IndefiniteArticle { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { self.inner.matches(tokens, source) } } diff --git a/harper-core/src/patterns/invert.rs b/harper-core/src/patterns/invert.rs index b5d4febde..f1d9d80fc 100644 --- a/harper-core/src/patterns/invert.rs +++ b/harper-core/src/patterns/invert.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::Token; use super::Pattern; @@ -18,11 +16,11 @@ impl Invert { } impl Pattern for Invert { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - NonZeroUsize::new(if self.inner.matches(tokens, source).is_some() { - 0 + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + if self.inner.matches(tokens, source).is_some() { + None } else { - 1 - }) + Some(1) + } } } diff --git a/harper-core/src/patterns/mod.rs b/harper-core/src/patterns/mod.rs index 2750c6c36..e1ff11241 100644 --- a/harper-core/src/patterns/mod.rs +++ b/harper-core/src/patterns/mod.rs @@ -5,7 +5,7 @@ //! //! See the page about [`SequencePattern`] for a concrete example of their use. -use std::{collections::VecDeque, num::NonZeroUsize}; +use std::collections::VecDeque; use crate::{Document, Span, Token, VecExt}; @@ -55,7 +55,7 @@ pub trait Pattern { /// Check if the pattern matches at the start of the given token slice. /// /// Returns the length of the match if successful, or `None` if not. - fn matches(&self, tokens: &[Token], source: &[char]) -> Option; + fn matches(&self, tokens: &[Token], source: &[char]) -> Option; } #[cfg(feature = "concurrent")] #[blanket(derive(Arc))] @@ -63,7 +63,7 @@ pub trait Pattern: Send + Sync { /// Check if the pattern matches at the start of the given token slice. /// /// Returns the length of the match if successful, or `None` if not. - fn matches(&self, tokens: &[Token], source: &[char]) -> Option; + fn matches(&self, tokens: &[Token], source: &[char]) -> Option; } pub trait PatternExt { @@ -82,7 +82,7 @@ where let len = self.matches(&tokens[i..], source); if let Some(len) = len { - found.push(Span::new_with_len(i, len.get())); + found.push(Span::new_with_len(i, len)); } } @@ -126,8 +126,12 @@ where F: Fn(&Token, &[char]) -> bool, F: Send + Sync, { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - NonZeroUsize::new(if self(tokens.first()?, source) { 1 } else { 0 }) + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + if self(tokens.first()?, source) { + Some(1) + } else { + None + } } } diff --git a/harper-core/src/patterns/naive_pattern_group.rs b/harper-core/src/patterns/naive_pattern_group.rs index 0c08564e6..4520834bb 100644 --- a/harper-core/src/patterns/naive_pattern_group.rs +++ b/harper-core/src/patterns/naive_pattern_group.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use super::Pattern; use crate::Token; @@ -17,7 +15,7 @@ impl NaivePatternGroup { } impl Pattern for NaivePatternGroup { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { self.patterns .iter() .filter_map(|p| p.matches(tokens, source)) diff --git a/harper-core/src/patterns/nominal_phrase.rs b/harper-core/src/patterns/nominal_phrase.rs index a655f5bef..555c1beee 100644 --- a/harper-core/src/patterns/nominal_phrase.rs +++ b/harper-core/src/patterns/nominal_phrase.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::Token; use super::Pattern; @@ -8,7 +6,7 @@ use super::Pattern; pub struct NominalPhrase; impl Pattern for NominalPhrase { - fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { let mut cursor = 0; loop { @@ -26,7 +24,7 @@ impl Pattern for NominalPhrase { } if tok.kind.is_nominal() { - return NonZeroUsize::new(cursor + 1); + return Some(cursor + 1); } return None; diff --git a/harper-core/src/patterns/pattern_map.rs b/harper-core/src/patterns/pattern_map.rs index 3a6b19ad1..b4a654896 100644 --- a/harper-core/src/patterns/pattern_map.rs +++ b/harper-core/src/patterns/pattern_map.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::LSend; use crate::Token; @@ -69,7 +67,7 @@ impl Pattern for PatternMap where T: LSend, { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { self.rows .iter() .filter_map(|row| row.key.matches(tokens, source)) diff --git a/harper-core/src/patterns/repeating_pattern.rs b/harper-core/src/patterns/repeating_pattern.rs index 514551c4c..b18aa2eef 100644 --- a/harper-core/src/patterns/repeating_pattern.rs +++ b/harper-core/src/patterns/repeating_pattern.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use super::Pattern; use crate::Token; @@ -21,7 +19,7 @@ impl RepeatingPattern { } impl Pattern for RepeatingPattern { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { let mut tok_cursor = 0; let mut repetition = 0; @@ -29,10 +27,17 @@ impl Pattern for RepeatingPattern { let match_len = self.inner.matches(&tokens[tok_cursor..], source); if let Some(match_len) = match_len { - tok_cursor += match_len.get(); + if match_len == 0 { + // If match_len == 0, we won't move forward ever again. + // This means that we can get infinitely many repetitions, + // so repetition >= self.required_repetitions is guaranteed. + return Some(tok_cursor); + } + + tok_cursor += match_len; repetition += 1; } else if repetition >= self.required_repetitions { - return NonZeroUsize::new(tok_cursor); + return Some(tok_cursor); } else { return None; } @@ -42,7 +47,6 @@ impl Pattern for RepeatingPattern { #[cfg(test)] mod tests { - use std::num::NonZeroUsize; use super::RepeatingPattern; use crate::Document; @@ -57,7 +61,7 @@ mod tests { assert_eq!( pat.matches(doc.get_tokens(), doc.get_source()), - NonZeroUsize::new(doc.get_tokens().len()) + Some(doc.get_tokens().len()) ) } diff --git a/harper-core/src/patterns/sequence_pattern.rs b/harper-core/src/patterns/sequence_pattern.rs index bb08b7c1b..5ad3c9a63 100644 --- a/harper-core/src/patterns/sequence_pattern.rs +++ b/harper-core/src/patterns/sequence_pattern.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use paste::paste; use super::whitespace_pattern::WhitespacePattern; @@ -165,7 +163,7 @@ impl SequencePattern { pub fn then_one_or_more(mut self, pat: impl Pattern + 'static) -> Self { self.token_patterns - .push(Box::new(RepeatingPattern::new(Box::new(pat), 0))); + .push(Box::new(RepeatingPattern::new(Box::new(pat), 1))); self } @@ -183,23 +181,21 @@ impl SequencePattern { } impl Pattern for SequencePattern { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { let mut tok_cursor = 0; for pat in self.token_patterns.iter() { let match_length = pat.matches(&tokens[tok_cursor..], source)?; - tok_cursor += match_length.get(); + tok_cursor += match_length; } - NonZeroUsize::new(tok_cursor) + Some(tok_cursor) } } #[cfg(test)] mod tests { - use std::num::NonZeroUsize; - use super::SequencePattern; use crate::Document; use crate::patterns::Pattern; @@ -214,7 +210,7 @@ mod tests { assert_eq!( pat.matches(doc.get_tokens(), doc.get_source()), - NonZeroUsize::new(doc.get_tokens().len()) + Some(doc.get_tokens().len()) ); } @@ -228,7 +224,7 @@ mod tests { assert_eq!( pat.matches(doc.get_tokens(), doc.get_source()), - NonZeroUsize::new(doc.get_tokens().len()) + Some(doc.get_tokens().len()) ); } @@ -239,7 +235,7 @@ mod tests { assert_eq!( pat.matches(doc.get_tokens(), doc.get_source()), - NonZeroUsize::new(doc.get_tokens().len()) + Some(doc.get_tokens().len()) ); } } diff --git a/harper-core/src/patterns/similar_to_phrase.rs b/harper-core/src/patterns/similar_to_phrase.rs index 10fdd1c31..932f25871 100644 --- a/harper-core/src/patterns/similar_to_phrase.rs +++ b/harper-core/src/patterns/similar_to_phrase.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::{Document, Token, TokenKind}; use super::{ @@ -61,7 +59,7 @@ impl SimilarToPhrase { } impl Pattern for SimilarToPhrase { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { if self.phrase.matches(tokens, source).is_some() { return None; } diff --git a/harper-core/src/patterns/split_compound_word.rs b/harper-core/src/patterns/split_compound_word.rs index 836d788ec..445c9896c 100644 --- a/harper-core/src/patterns/split_compound_word.rs +++ b/harper-core/src/patterns/split_compound_word.rs @@ -1,4 +1,4 @@ -use std::{num::NonZeroUsize, sync::Arc}; +use std::sync::Arc; use crate::{CharString, Dictionary, FstDictionary, Token, WordMetadata}; @@ -57,10 +57,10 @@ impl SplitCompoundWord { } impl Pattern for SplitCompoundWord { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { let inner_match = self.inner.matches(tokens, source)?; - if inner_match.get() != 3 { + if inner_match != 3 { return None; } diff --git a/harper-core/src/patterns/whitespace_pattern.rs b/harper-core/src/patterns/whitespace_pattern.rs index 2ead95e6f..69cb92113 100644 --- a/harper-core/src/patterns/whitespace_pattern.rs +++ b/harper-core/src/patterns/whitespace_pattern.rs @@ -1,16 +1,14 @@ -use std::num::NonZeroUsize; - use super::Pattern; pub struct WhitespacePattern; impl Pattern for WhitespacePattern { - fn matches(&self, tokens: &[crate::Token], _source: &[char]) -> Option { - NonZeroUsize::new( - tokens - .iter() - .position(|t| !t.kind.is_whitespace()) - .unwrap_or(tokens.len()), - ) + fn matches(&self, tokens: &[crate::Token], _source: &[char]) -> Option { + let count = tokens + .iter() + .position(|t| !t.kind.is_whitespace()) + .unwrap_or(tokens.len()); + + if count == 0 { None } else { Some(count) } } } diff --git a/harper-core/src/patterns/within_edit_distance.rs b/harper-core/src/patterns/within_edit_distance.rs index 0b5fb6b5b..897d7fe6e 100644 --- a/harper-core/src/patterns/within_edit_distance.rs +++ b/harper-core/src/patterns/within_edit_distance.rs @@ -1,5 +1,4 @@ use std::cell::RefCell; -use std::num::NonZeroUsize; use super::Pattern; use crate::{CharString, CharStringExt, Token}; @@ -33,7 +32,7 @@ thread_local! { } impl Pattern for WithinEditDistance { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { let first = tokens.first()?; if !first.kind.is_word() { return None; @@ -48,7 +47,11 @@ impl Pattern for WithinEditDistance { buffer_a, buffer_b, ); - NonZeroUsize::new(if distance <= self.max_edit_dist { 1 } else { 0 }) + if distance <= self.max_edit_dist { + Some(1) + } else { + None + } }) } } diff --git a/harper-core/src/patterns/word_pattern_group.rs b/harper-core/src/patterns/word_pattern_group.rs index 1c3f5399d..5028b592e 100644 --- a/harper-core/src/patterns/word_pattern_group.rs +++ b/harper-core/src/patterns/word_pattern_group.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use hashbrown::HashMap; use super::naive_pattern_group::NaivePatternGroup; @@ -42,7 +40,7 @@ impl

Pattern for WordPatternGroup

where P: Pattern, { - fn matches(&self, tokens: &[crate::Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[crate::Token], source: &[char]) -> Option { let first = tokens.first()?; if !first.kind.is_word() { return None; diff --git a/harper-core/src/patterns/word_set.rs b/harper-core/src/patterns/word_set.rs index cbb892004..c325a62b4 100644 --- a/harper-core/src/patterns/word_set.rs +++ b/harper-core/src/patterns/word_set.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use super::Pattern; use smallvec::SmallVec; @@ -40,7 +38,7 @@ impl WordSet { } impl Pattern for WordSet { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { let tok = tokens.first()?; if !tok.kind.is_word() { return None; @@ -59,7 +57,7 @@ impl Pattern for WordSet { .all(|(a, b)| a.eq_ignore_ascii_case(b)); if partial_match { - return NonZeroUsize::new(1); + return Some(1); } } From 0e8b028ed23f60c6a11836b17579d86c2b2cf293 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Sun, 27 Apr 2025 14:53:48 +0200 Subject: [PATCH 3/8] refactor: remove `IntoPatternList` and use macros instead --- harper-core/src/linting/modal_of.rs | 4 +- .../src/patterns/new_syntax_experiment.rs | 89 ++++++------------- 2 files changed, 30 insertions(+), 63 deletions(-) diff --git a/harper-core/src/linting/modal_of.rs b/harper-core/src/linting/modal_of.rs index b03ce7797..efc2e3212 100644 --- a/harper-core/src/linting/modal_of.rs +++ b/harper-core/src/linting/modal_of.rs @@ -25,13 +25,13 @@ impl Default for ModalOf { let anyword_might_of = Lrc::new(seq![WORD, WS, "might", WS, "of"]); Self { - pattern: Box::new(Choice::new(( + pattern: Box::new(choice![ // TODO: Use optional for seq![anyword_might_of.clone(), WS, "course"], seq![modal_of.clone(), WS, "course"], anyword_might_of, modal_of, - ))), + ]), } } } diff --git a/harper-core/src/patterns/new_syntax_experiment.rs b/harper-core/src/patterns/new_syntax_experiment.rs index 23c60ca80..81ffcc24d 100644 --- a/harper-core/src/patterns/new_syntax_experiment.rs +++ b/harper-core/src/patterns/new_syntax_experiment.rs @@ -7,6 +7,13 @@ use super::{AnyCapitalization, AnyPattern, Pattern, WhitespacePattern}; pub trait IntoPattern { type Output: Pattern + 'static; fn into_pattern(self) -> Self::Output; + + fn into_pattern_boxed(self) -> Box + where + Self: Sized, + { + Box::new(self.into_pattern()) + } } impl IntoPattern for T { type Output = T; @@ -21,62 +28,12 @@ impl IntoPattern for &str { } } -pub trait IntoPatternList { - fn into_pattern_list(self) -> Vec>; -} -impl IntoPatternList for Vec> { - #[inline] - fn into_pattern_list(self) -> Vec> { - self - } -} -impl IntoPatternList for () { - #[inline] - fn into_pattern_list(self) -> Vec> { - Vec::new() - } -} -impl IntoPatternList for P { - #[inline] - fn into_pattern_list(self) -> Vec> { - vec![Box::new(self.into_pattern())] - } -} - -macro_rules! impl_into_pattern_list { - ($($name:ident = $index:tt),+) => { - impl<$($name: IntoPattern),+> IntoPatternList for ($($name),+) { - #[inline] - fn into_pattern_list(self) -> Vec> { - vec![$(Box::new(self.$index.into_pattern())),+] - } - } - }; -} -impl_into_pattern_list!(A = 0, B = 1); -impl_into_pattern_list!(A = 0, B = 1, C = 2); -impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3); -impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4); -impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5); -impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6); -impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7); -#[rustfmt::skip] -impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7, I = 8); -#[rustfmt::skip] -impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7, I = 8, J = 9); -#[rustfmt::skip] -impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7, I = 8, J = 9, K = 10); -#[rustfmt::skip] -impl_into_pattern_list!(A = 0, B = 1, C = 2, D = 3, E = 4, F = 5, G = 6, H = 7, I = 8, J = 9, K = 10, L = 11); - pub struct Sequence { - pub patterns: Vec>, + patterns: Vec>, } impl Sequence { - pub fn new(patterns: impl IntoPatternList) -> Self { - Self { - patterns: patterns.into_pattern_list(), - } + pub fn new(patterns: Vec>) -> Self { + Self { patterns } } } impl Pattern for Sequence { @@ -93,13 +50,11 @@ impl Pattern for Sequence { } pub struct Choice { - pub patterns: Vec>, + patterns: Vec>, } impl Choice { - pub fn new(patterns: impl IntoPatternList) -> Self { - Self { - patterns: patterns.into_pattern_list(), - } + pub fn new(patterns: Vec>) -> Self { + Self { patterns } } } impl Pattern for Choice { @@ -159,12 +114,24 @@ pub const WS: WhitespacePattern = WhitespacePattern; pub mod prelude { pub use super::super::{Pattern, WordSet}; - pub use super::{ANY, Choice, Sequence, WORD, WS, exact}; + pub use super::{ANY, Choice, IntoPattern, Sequence, WORD, WS, exact}; macro_rules! seq { + ($item:expr $(,)?) => { + IntoPattern::into_pattern($item) + }; ($($item:expr),* $(,)?) => { - Sequence::new(($($item),*)) + Sequence::new(vec![$(IntoPattern::into_pattern_boxed($item)),*]) }; } - pub(crate) use seq; + macro_rules! choice { + ($($item:literal),+ $(,)?) => { + WordSet::new(&[$($item),*]) + }; + ($($item:expr),* $(,)?) => { + Choice::new(vec![$(IntoPattern::into_pattern_boxed($item)),*]) + }; + } + + pub(crate) use {choice, seq}; } From 0be666a81450d079e7a69ba527905cf07664fd70 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Mon, 28 Apr 2025 13:52:43 +0200 Subject: [PATCH 4/8] fix: fix merge conflict --- .../src/patterns/new_syntax_experiment.rs | 29 ++++++++++--------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/harper-core/src/patterns/new_syntax_experiment.rs b/harper-core/src/patterns/new_syntax_experiment.rs index 81ffcc24d..49f902af1 100644 --- a/harper-core/src/patterns/new_syntax_experiment.rs +++ b/harper-core/src/patterns/new_syntax_experiment.rs @@ -1,5 +1,3 @@ -use std::num::NonZeroUsize; - use crate::{CharString, Token}; use super::{AnyCapitalization, AnyPattern, Pattern, WhitespacePattern}; @@ -37,15 +35,15 @@ impl Sequence { } } impl Pattern for Sequence { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { let mut tok_cursor = 0; for pat in &self.patterns { let match_length = pat.matches(&tokens[tok_cursor..], source)?; - tok_cursor += match_length.get(); + tok_cursor += match_length; } - NonZeroUsize::new(tok_cursor) + Some(tok_cursor) } } @@ -58,15 +56,18 @@ impl Choice { } } impl Pattern for Choice { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - let mut longest = 0; + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + let mut longest: Option = None; for pattern in self.patterns.iter() { - let match_len = pattern.matches(tokens, source).map_or(0, NonZeroUsize::get); - longest = longest.max(match_len); + let Some(match_len) = pattern.matches(tokens, source) else { + continue; + }; + + longest = Some(longest.unwrap_or(0).max(match_len)); } - NonZeroUsize::new(longest) + longest } } @@ -74,7 +75,7 @@ struct ExactWord { word: CharString, } impl Pattern for ExactWord { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { let tok = tokens.first()?; if !tok.kind.is_word() { return None; @@ -86,7 +87,7 @@ impl Pattern for ExactWord { let chars = tok.span.get_content(source); let eq = chars == self.word.as_slice(); - NonZeroUsize::new(if eq { 1 } else { 0 }) + if eq { Some(1) } else { None } } } @@ -99,12 +100,12 @@ pub fn exact(word: &str) -> impl Pattern { #[derive(Clone, Copy)] pub struct WordTokenPattern; impl Pattern for WordTokenPattern { - fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { + fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { let tok = tokens.first()?; if !tok.kind.is_word() { return None; } - NonZeroUsize::new(1) + Some(1) } } From 9a3096ef4e891c50080c25da71567a42ef3cf774 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Mon, 28 Apr 2025 17:51:12 +0200 Subject: [PATCH 5/8] feat: add lookahead assertions --- harper-core/src/linting/modal_of.rs | 30 ++----- harper-core/src/patterns/mod.rs | 4 +- .../src/patterns/new_syntax_experiment.rs | 82 ++++++++++++++++++- 3 files changed, 87 insertions(+), 29 deletions(-) diff --git a/harper-core/src/linting/modal_of.rs b/harper-core/src/linting/modal_of.rs index efc2e3212..eaa2d7e9b 100644 --- a/harper-core/src/linting/modal_of.rs +++ b/harper-core/src/linting/modal_of.rs @@ -1,5 +1,5 @@ use crate::{ - Lrc, Token, TokenStringExt, + Token, TokenStringExt, patterns::{Pattern, new_syntax_experiment::prelude::*}, }; @@ -21,16 +21,10 @@ impl Default for ModalOf { words.add(&format!("{}n't", word)); }); - let modal_of = Lrc::new(seq![words, WS, "of"]); - let anyword_might_of = Lrc::new(seq![WORD, WS, "might", WS, "of"]); - Self { pattern: Box::new(choice![ - // TODO: Use optional for - seq![anyword_might_of.clone(), WS, "course"], - seq![modal_of.clone(), WS, "course"], - anyword_might_of, - modal_of, + seq![WORD, WS, "might", WS, "of", not_ahead![WS, "course"]], + seq![words, WS, "of", not_ahead![WS, "course"]], ]), } } @@ -43,20 +37,7 @@ impl PatternLinter for ModalOf { fn match_to_lint(&self, matched_toks: &[Token], source_chars: &[char]) -> Option { let modal_index = match matched_toks.len() { - // Without context, always an error from the start - 3 => 0, 5 => { - // False positives: modal _ of _ course / adj. _ might _ of / art. _ might _ of - let w3_text = matched_toks - .last() - .unwrap() - .span - .get_content(source_chars) - .iter() - .collect::(); - if w3_text.as_str() != "of" { - return None; - } let w1_kind = &matched_toks.first().unwrap().kind; // the might of something, great might of something if w1_kind.is_adjective() || w1_kind.is_determiner() { @@ -65,9 +46,8 @@ impl PatternLinter for ModalOf { // not a false positive, skip context before 2 } - // False positive: _ might _ of _ course - 7 => return None, - _ => unreachable!(), + // Without context, always an error from the start + _ => 0, }; let span_modal_of = matched_toks[modal_index..modal_index + 3].span().unwrap(); diff --git a/harper-core/src/patterns/mod.rs b/harper-core/src/patterns/mod.rs index 34de9f7f8..ddc1d0a32 100644 --- a/harper-core/src/patterns/mod.rs +++ b/harper-core/src/patterns/mod.rs @@ -141,8 +141,8 @@ impl Pattern for F where F: Fn(&Token, &[char]) -> bool, { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - NonZeroUsize::new(if self(tokens.first()?, source) { 1 } else { 0 }) + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + if self(tokens.first()?, source) { Some(1) } else { None } } } diff --git a/harper-core/src/patterns/new_syntax_experiment.rs b/harper-core/src/patterns/new_syntax_experiment.rs index 49f902af1..1ecbbc91d 100644 --- a/harper-core/src/patterns/new_syntax_experiment.rs +++ b/harper-core/src/patterns/new_syntax_experiment.rs @@ -90,13 +90,40 @@ impl Pattern for ExactWord { if eq { Some(1) } else { None } } } - pub fn exact(word: &str) -> impl Pattern { ExactWord { word: word.chars().collect(), } } +struct Not

(P); +impl Pattern for Not

{ + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + if self.0.matches(tokens, source).is_some() { + None + } else { + Some(0) + } + } +} +pub fn not(pattern: impl Pattern) -> impl Pattern { + Not(pattern) +} + +struct Ahead

(P); +impl Pattern for Ahead

{ + fn matches(&self, tokens: &[Token], source: &[char]) -> Option { + if self.0.matches(tokens, source).is_some() { + Some(0) + } else { + None + } + } +} +pub fn ahead(pattern: impl Pattern) -> impl Pattern { + Ahead(pattern) +} + #[derive(Clone, Copy)] pub struct WordTokenPattern; impl Pattern for WordTokenPattern { @@ -117,6 +144,16 @@ pub mod prelude { pub use super::super::{Pattern, WordSet}; pub use super::{ANY, Choice, IntoPattern, Sequence, WORD, WS, exact}; + /// Matches a sequence of patterns. + /// + /// This is the same as concatenating the patterns together. + /// + /// ## Examples + /// + /// ```rust + /// use crate::patterns::new_syntax_experiment::preluse::*; + /// let confession = seq!["I", WS, "love", WS, "you"]; + /// ``` macro_rules! seq { ($item:expr $(,)?) => { IntoPattern::into_pattern($item) @@ -125,6 +162,14 @@ pub mod prelude { Sequence::new(vec![$(IntoPattern::into_pattern_boxed($item)),*]) }; } + /// Matches any of the given patterns. + /// + /// ## Examples + /// + /// ```rust + /// use crate::patterns::new_syntax_experiment::preluse::*; + /// let fav_animal = choice!["dog", "cat", seq!["black", WS, "bear"]]; + /// ``` macro_rules! choice { ($($item:literal),+ $(,)?) => { WordSet::new(&[$($item),*]) @@ -134,5 +179,38 @@ pub mod prelude { }; } - pub(crate) use {choice, seq}; + /// An assertion that matches the given sequence of patterns, but does NOT + /// consume any tokens. + /// + /// ## Examples + /// + /// ```rust + /// use crate::patterns::new_syntax_experiment::preluse::*; + /// let love = seq!["I", WS, "love", ahead![WS, "you"]]; + /// ``` + macro_rules! ahead { + ($($item:expr),* $(,)?) => { + crate::patterns::new_syntax_experiment::ahead(seq![$($item),*]) + }; + } + /// An assertion that matches anything but the given sequence of patterns. + /// No tokens are consumed. + /// + /// ## Examples + /// + /// ```rust + /// use crate::patterns::new_syntax_experiment::preluse::*; + /// let love_no_ego = seq!["I", WS, "love", not_ahead![WS, "myself"]]; + /// ``` + macro_rules! not_ahead { + ($($item:expr),* $(,)?) => { + crate::patterns::new_syntax_experiment::not( + crate::patterns::new_syntax_experiment::ahead( + seq![$($item),*] + ) + ) + }; + } + + pub(crate) use {ahead, choice, not_ahead, seq}; } From d2897882deddd0fbea3a746f285cd7fdf15e9da1 Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Mon, 28 Apr 2025 20:57:59 +0200 Subject: [PATCH 6/8] feat: better names for any token/word patterns --- harper-core/src/linting/modal_of.rs | 2 +- .../src/patterns/new_syntax_experiment.rs | 19 ++++++++++++++----- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/harper-core/src/linting/modal_of.rs b/harper-core/src/linting/modal_of.rs index eaa2d7e9b..dc390e34c 100644 --- a/harper-core/src/linting/modal_of.rs +++ b/harper-core/src/linting/modal_of.rs @@ -23,7 +23,7 @@ impl Default for ModalOf { Self { pattern: Box::new(choice![ - seq![WORD, WS, "might", WS, "of", not_ahead![WS, "course"]], + seq![AnyWord, WS, "might", WS, "of", not_ahead![WS, "course"]], seq![words, WS, "of", not_ahead![WS, "course"]], ]), } diff --git a/harper-core/src/patterns/new_syntax_experiment.rs b/harper-core/src/patterns/new_syntax_experiment.rs index 1ecbbc91d..da0dab1c6 100644 --- a/harper-core/src/patterns/new_syntax_experiment.rs +++ b/harper-core/src/patterns/new_syntax_experiment.rs @@ -124,9 +124,19 @@ pub fn ahead(pattern: impl Pattern) -> impl Pattern { Ahead(pattern) } +/// Matches any single token, regardless of its kind. #[derive(Clone, Copy)] -pub struct WordTokenPattern; -impl Pattern for WordTokenPattern { +pub struct AnyToken; +impl Pattern for AnyToken { + fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { + if tokens.is_empty() { None } else { Some(1) } + } +} + +/// Matches any word token. +#[derive(Clone, Copy)] +pub struct AnyWord; +impl Pattern for AnyWord { fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { let tok = tokens.first()?; if !tok.kind.is_word() { @@ -136,13 +146,12 @@ impl Pattern for WordTokenPattern { } } -pub const ANY: AnyPattern = AnyPattern; -pub const WORD: WordTokenPattern = WordTokenPattern; pub const WS: WhitespacePattern = WhitespacePattern; pub mod prelude { + pub use super::super::{Pattern, WordSet}; - pub use super::{ANY, Choice, IntoPattern, Sequence, WORD, WS, exact}; + pub use super::{AnyToken, AnyWord, Choice, IntoPattern, Sequence, WS, exact}; /// Matches a sequence of patterns. /// From 20245ebb398d8c2606a54586ffaa63e0235216fc Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 29 Apr 2025 20:42:15 +0200 Subject: [PATCH 7/8] feat: add predicates --- harper-core/src/linting/confident.rs | 13 +- .../src/patterns/any_capitalization.rs | 18 +- harper-core/src/patterns/mod.rs | 35 ++- .../src/patterns/new_syntax_experiment.rs | 227 ++++++++++++++++-- 4 files changed, 240 insertions(+), 53 deletions(-) diff --git a/harper-core/src/linting/confident.rs b/harper-core/src/linting/confident.rs index c4cb8aff2..4204c6cd4 100644 --- a/harper-core/src/linting/confident.rs +++ b/harper-core/src/linting/confident.rs @@ -1,7 +1,6 @@ use crate::{ Token, - char_string::char_string, - patterns::{AnyCapitalization, OwnedPatternExt, Pattern, SequencePattern}, + patterns::{Pattern, new_syntax_experiment::prelude::*}, }; use super::{Lint, LintKind, PatternLinter, Suggestion}; @@ -12,16 +11,8 @@ pub struct Confident { impl Default for Confident { fn default() -> Self { - let pattern = SequencePattern::default() - .then( - (|tok: &Token, _source: &[char]| tok.kind.is_verb() || tok.kind.is_determiner()) - .or(Box::new(AnyCapitalization::new(char_string!("very")))), - ) - .then_whitespace() - .t_aco("confidant"); - Self { - pattern: Box::new(pattern), + pattern: Box::new(seq![Verb | Det | "very", WS, "confidant"]), } } } diff --git a/harper-core/src/patterns/any_capitalization.rs b/harper-core/src/patterns/any_capitalization.rs index 0a6f94ed6..967057657 100644 --- a/harper-core/src/patterns/any_capitalization.rs +++ b/harper-core/src/patterns/any_capitalization.rs @@ -1,6 +1,6 @@ use crate::{CharString, Token}; -use super::Pattern; +use super::SinlgeTokenPattern; /// A [`Pattern`] that matches any capitalization of a provided word. #[derive(Clone)] @@ -20,25 +20,21 @@ impl AnyCapitalization { } } -impl Pattern for AnyCapitalization { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - let tok = tokens.first()?; - +impl SinlgeTokenPattern for AnyCapitalization { + fn matches_token(&self, tok: &Token, source: &[char]) -> bool { if !tok.kind.is_word() { - return None; + return false; } if tok.span.len() != self.word.len() { - return None; + return false; } let tok_chars = tok.span.get_content(source); - let partial_match = tok_chars + tok_chars .iter() .zip(&self.word) - .all(|(a, b)| a.eq_ignore_ascii_case(b)); - - if partial_match { Some(1) } else { None } + .all(|(a, b)| a.eq_ignore_ascii_case(b)) } } diff --git a/harper-core/src/patterns/mod.rs b/harper-core/src/patterns/mod.rs index ddc1d0a32..b2f18193a 100644 --- a/harper-core/src/patterns/mod.rs +++ b/harper-core/src/patterns/mod.rs @@ -122,13 +122,18 @@ where } #[cfg(feature = "concurrent")] -impl Pattern for F -where - F: Fn(&Token, &[char]) -> bool, - F: Send + Sync, -{ +pub trait SinlgeTokenPattern: Send + Sync + 'static { + fn matches_token(&self, token: &Token, source: &[char]) -> bool; +} +#[cfg(not(feature = "concurrent"))] +pub trait SinlgeTokenPattern: 'static { + fn matches_token(&self, token: &Token, source: &[char]) -> bool; +} + +impl Pattern for P { fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - if self(tokens.first()?, source) { + let t = tokens.first()?; + if self.matches_token(t, source) { Some(1) } else { None @@ -136,13 +141,25 @@ where } } +#[cfg(feature = "concurrent")] +impl SinlgeTokenPattern for F +where + F: Fn(&Token, &[char]) -> bool, + F: Send + Sync + 'static, +{ + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + self(token, source) + } +} + #[cfg(not(feature = "concurrent"))] -impl Pattern for F +impl SinlgeTokenPattern for F where F: Fn(&Token, &[char]) -> bool, + F: 'static, { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - if self(tokens.first()?, source) { Some(1) } else { None } + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + self(token, source) } } diff --git a/harper-core/src/patterns/new_syntax_experiment.rs b/harper-core/src/patterns/new_syntax_experiment.rs index da0dab1c6..62c12f3a8 100644 --- a/harper-core/src/patterns/new_syntax_experiment.rs +++ b/harper-core/src/patterns/new_syntax_experiment.rs @@ -1,6 +1,6 @@ use crate::{CharString, Token}; -use super::{AnyCapitalization, AnyPattern, Pattern, WhitespacePattern}; +use super::{AnyCapitalization, Pattern, SinlgeTokenPattern, WhitespacePattern}; pub trait IntoPattern { type Output: Pattern + 'static; @@ -74,20 +74,17 @@ impl Pattern for Choice { struct ExactWord { word: CharString, } -impl Pattern for ExactWord { - fn matches(&self, tokens: &[Token], source: &[char]) -> Option { - let tok = tokens.first()?; - if !tok.kind.is_word() { - return None; +impl SinlgeTokenPattern for ExactWord { + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + if !token.kind.is_word() { + return false; } - if tok.span.len() != self.word.len() { - return None; + if token.span.len() != self.word.len() { + return false; } - let chars = tok.span.get_content(source); - let eq = chars == self.word.as_slice(); - - if eq { Some(1) } else { None } + let chars = token.span.get_content(source); + chars == self.word.as_slice() } } pub fn exact(word: &str) -> impl Pattern { @@ -127,30 +124,216 @@ pub fn ahead(pattern: impl Pattern) -> impl Pattern { /// Matches any single token, regardless of its kind. #[derive(Clone, Copy)] pub struct AnyToken; -impl Pattern for AnyToken { - fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { - if tokens.is_empty() { None } else { Some(1) } +impl SinlgeTokenPattern for AnyToken { + fn matches_token(&self, _token: &Token, _source: &[char]) -> bool { + true } } /// Matches any word token. #[derive(Clone, Copy)] pub struct AnyWord; -impl Pattern for AnyWord { - fn matches(&self, tokens: &[Token], _source: &[char]) -> Option { - let tok = tokens.first()?; - if !tok.kind.is_word() { - return None; - } - Some(1) +impl SinlgeTokenPattern for AnyWord { + fn matches_token(&self, token: &Token, _source: &[char]) -> bool { + token.kind.is_word() } } pub const WS: WhitespacePattern = WhitespacePattern; +pub mod predicates { + use crate::{ + Token, TokenKind, + patterns::{AnyCapitalization, SinlgeTokenPattern}, + }; + + pub trait IntoSingleTokenPattern { + type Output: SinlgeTokenPattern; + fn into_single_token_pattern(self) -> Self::Output; + } + impl IntoSingleTokenPattern for T { + type Output = T; + fn into_single_token_pattern(self) -> Self::Output { + self + } + } + impl IntoSingleTokenPattern for &str { + type Output = AnyCapitalization; + fn into_single_token_pattern(self) -> Self::Output { + AnyCapitalization::of(self) + } + } + + #[derive(Clone, Copy)] + pub struct Not(P); + impl SinlgeTokenPattern for Not

{ + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + !self.0.matches_token(token, source) + } + } + impl std::ops::Not for Not

{ + type Output = P; + fn not(self) -> Self::Output { + self.0 + } + } + impl std::ops::BitAnd for Not

{ + type Output = And; + fn bitand(self, rhs: R) -> Self::Output { + And(self, rhs.into_single_token_pattern()) + } + } + impl std::ops::BitOr for Not

{ + type Output = Or; + fn bitor(self, rhs: R) -> Self::Output { + Or(self, rhs.into_single_token_pattern()) + } + } + + #[derive(Clone, Copy)] + pub struct And(P1, P2); + impl SinlgeTokenPattern for And { + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + self.0.matches_token(token, source) && self.1.matches_token(token, source) + } + } + impl std::ops::Not for And { + type Output = Not; + fn not(self) -> Self::Output { + Not(self) + } + } + impl + std::ops::BitAnd for And + { + type Output = And; + fn bitand(self, rhs: R) -> Self::Output { + And(self, rhs.into_single_token_pattern()) + } + } + impl + std::ops::BitOr for And + { + type Output = Or; + fn bitor(self, rhs: R) -> Self::Output { + Or(self, rhs.into_single_token_pattern()) + } + } + + #[derive(Clone, Copy)] + pub struct Or(P1, P2); + impl SinlgeTokenPattern for Or { + fn matches_token(&self, token: &Token, source: &[char]) -> bool { + self.0.matches_token(token, source) || self.1.matches_token(token, source) + } + } + impl + std::ops::BitAnd for Or + { + type Output = And; + fn bitand(self, rhs: R) -> Self::Output { + And(self, rhs.into_single_token_pattern()) + } + } + impl + std::ops::BitOr for Or + { + type Output = Or; + fn bitor(self, rhs: R) -> Self::Output { + Or(self, rhs.into_single_token_pattern()) + } + } + + macro_rules! add_operators { + ($name:ident) => { + impl std::ops::Not for $name { + type Output = Not<$name>; + fn not(self) -> Self::Output { + Not(self) + } + } + impl std::ops::BitAnd for $name { + type Output = And<$name, R::Output>; + fn bitand(self, rhs: R) -> Self::Output { + And(self, rhs.into_single_token_pattern()) + } + } + impl std::ops::BitOr for $name { + type Output = Or<$name, R::Output>; + fn bitor(self, rhs: R) -> Self::Output { + Or(self, rhs.into_single_token_pattern()) + } + } + }; + } + macro_rules! create_predicte { + ($name:ident, $fn:expr) => { + #[derive(Clone, Copy)] + pub struct $name; + impl SinlgeTokenPattern for $name { + fn matches_token(&self, token: &Token, _: &[char]) -> bool { + $fn(&token.kind) + } + } + + add_operators!($name); + }; + } + + create_predicte!(Noun, TokenKind::is_noun); + create_predicte!(NounPl, TokenKind::is_plural_noun); + create_predicte!(Pronoun, TokenKind::is_pronoun); + create_predicte!(PronounPl, TokenKind::is_plural_pronoun); + create_predicte!(Nominal, TokenKind::is_nominal); + create_predicte!(NominalPl, TokenKind::is_plural_nominal); + + create_predicte!(Verb, TokenKind::is_verb); + create_predicte!(AuxVerb, TokenKind::is_auxiliary_verb); + create_predicte!(LinkingVerb, TokenKind::is_linking_verb); + create_predicte!(Adj, TokenKind::is_adjective); + create_predicte!(Adverb, TokenKind::is_adverb); + create_predicte!(Det, TokenKind::is_determiner); + create_predicte!(Prep, TokenKind::is_preposition); + + create_predicte!(Common, TokenKind::is_common_word); + create_predicte!(Homograph, TokenKind::is_likely_homograph); + + #[derive(Clone, Copy)] + pub struct Punct; + + macro_rules! define_punct { + ($name:ident, $char:literal, $fn:expr) => { + impl Punct<$char> { + pub const $name: Punct<$char> = Self; + } + impl SinlgeTokenPattern for Punct<$char> { + fn matches_token(&self, token: &Token, _source: &[char]) -> bool { + $fn(&token.kind) + } + } + }; + } + + define_punct!(HYPHEN, '-', TokenKind::is_hyphen); + define_punct!(COMMA, ',', TokenKind::is_comma); + define_punct!(QUOTE, '"', TokenKind::is_quote); + define_punct!(APOS, '\'', TokenKind::is_apostrophe); + define_punct!(PERIOD, '.', TokenKind::is_period); + define_punct!(AT, '@', TokenKind::is_at); + + fn foo() { + let a = (Adj | Noun | Det) & !Verb & Punct::COMMA; + let b = Verb | Det | "very"; + } +} + pub mod prelude { pub use super::super::{Pattern, WordSet}; + pub use super::predicates::{ + Adj, Adverb, AuxVerb, Common, Det, Homograph, LinkingVerb, Nominal, NominalPl, Noun, + NounPl, Prep, Pronoun, PronounPl, Punct, Verb, + }; pub use super::{AnyToken, AnyWord, Choice, IntoPattern, Sequence, WS, exact}; /// Matches a sequence of patterns. From 86d88b3bb2e46e88fefe5d1f3f6b2001526e1dca Mon Sep 17 00:00:00 2001 From: RunDevelopment Date: Tue, 29 Apr 2025 21:47:22 +0200 Subject: [PATCH 8/8] refactor: rename `ahead` -> `next` --- harper-core/src/linting/modal_of.rs | 4 ++-- harper-core/src/patterns/new_syntax_experiment.rs | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/harper-core/src/linting/modal_of.rs b/harper-core/src/linting/modal_of.rs index dc390e34c..4c0a82a2b 100644 --- a/harper-core/src/linting/modal_of.rs +++ b/harper-core/src/linting/modal_of.rs @@ -23,8 +23,8 @@ impl Default for ModalOf { Self { pattern: Box::new(choice![ - seq![AnyWord, WS, "might", WS, "of", not_ahead![WS, "course"]], - seq![words, WS, "of", not_ahead![WS, "course"]], + seq![AnyWord, WS, "might", WS, "of", not_next![WS, "course"]], + seq![words, WS, "of", not_next![WS, "course"]], ]), } } diff --git a/harper-core/src/patterns/new_syntax_experiment.rs b/harper-core/src/patterns/new_syntax_experiment.rs index 62c12f3a8..d0b24ee64 100644 --- a/harper-core/src/patterns/new_syntax_experiment.rs +++ b/harper-core/src/patterns/new_syntax_experiment.rs @@ -378,9 +378,9 @@ pub mod prelude { /// /// ```rust /// use crate::patterns::new_syntax_experiment::preluse::*; - /// let love = seq!["I", WS, "love", ahead![WS, "you"]]; + /// let love = seq!["I", WS, "love", next![WS, "you"]]; /// ``` - macro_rules! ahead { + macro_rules! next { ($($item:expr),* $(,)?) => { crate::patterns::new_syntax_experiment::ahead(seq![$($item),*]) }; @@ -392,9 +392,9 @@ pub mod prelude { /// /// ```rust /// use crate::patterns::new_syntax_experiment::preluse::*; - /// let love_no_ego = seq!["I", WS, "love", not_ahead![WS, "myself"]]; + /// let love_no_ego = seq!["I", WS, "love", not_next![WS, "myself"]]; /// ``` - macro_rules! not_ahead { + macro_rules! not_next { ($($item:expr),* $(,)?) => { crate::patterns::new_syntax_experiment::not( crate::patterns::new_syntax_experiment::ahead( @@ -404,5 +404,5 @@ pub mod prelude { }; } - pub(crate) use {ahead, choice, not_ahead, seq}; + pub(crate) use {choice, next, not_next, seq}; }