Skip to content

Commit 38f6484

Browse files
committed
feat(gitcommit): use tree-sitter for gitcommit
The previous gitcommit parser naively took the contents until the first comment. If there are inline comments, such as when squashing two commits in an interactive rebase, the text after those comments would not be checked. Instead, we use tree-sitter-gitcommit parsing and mask the nodes that are subject, message lines or breaking change descriptions. As before, we re-parse these parts with the existing markdown parser. A new crate `harper-git-commit` is added.
1 parent 62629d8 commit 38f6484

File tree

12 files changed

+158
-39
lines changed

12 files changed

+158
-39
lines changed

Cargo.lock

Lines changed: 22 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[workspace]
2-
members = ["harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst", "harper-stats", "harper-pos-utils", "harper-brill", "harper-ink", "harper-python"]
2+
members = ["harper-cli", "harper-core", "harper-ls", "harper-comments", "harper-wasm", "harper-tree-sitter", "harper-html", "harper-literate-haskell", "harper-typst", "harper-stats", "harper-pos-utils", "harper-brill", "harper-ink", "harper-python", "harper-git-commit"]
33
resolver = "2"
44

55
# Comment out the below lines if you plan to use a debugger.

harper-git-commit/Cargo.toml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
[package]
2+
name = "harper-git-commit"
3+
version = "0.67.0"
4+
edition = "2024"
5+
description = "The language checker for developers."
6+
license = "Apache-2.0"
7+
repository = "https://github.com/automattic/harper"
8+
9+
[dependencies]
10+
harper-core = { path = "../harper-core", version = "0.67.0" }
11+
harper-tree-sitter = { path = "../harper-tree-sitter", version = "0.67.0" }
12+
tree-sitter-gitcommit = { git = "https://github.com/gbprod/tree-sitter-gitcommit", rev = "a716678c0f00645fed1e6f1d0eb221481dbd6f6d" }
13+
tree-sitter = "0.25.10"
14+
15+
[dev-dependencies]
16+
paste = "1.0.15"

harper-git-commit/src/lib.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
use harper_core::Token;
2+
use harper_core::parsers::{self, Markdown, MarkdownOptions, Parser};
3+
use harper_tree_sitter::TreeSitterMasker;
4+
use tree_sitter::Node;
5+
6+
pub struct GitCommitParser {
7+
/// Used to grab the text nodes, and parse them as markdown.
8+
inner: parsers::Mask<TreeSitterMasker, Markdown>,
9+
}
10+
11+
impl GitCommitParser {
12+
fn node_condition(n: &Node) -> bool {
13+
matches!(n.kind(), "subject" | "message_line" | "breaking_change")
14+
}
15+
16+
pub fn new(markdown_options: MarkdownOptions) -> Self {
17+
Self {
18+
inner: parsers::Mask::new(
19+
TreeSitterMasker::new(tree_sitter_gitcommit::language(), Self::node_condition),
20+
Markdown::new(markdown_options),
21+
),
22+
}
23+
}
24+
}
25+
26+
impl Parser for GitCommitParser {
27+
fn parse(&self, source: &[char]) -> Vec<Token> {
28+
self.inner.parse(source)
29+
}
30+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
use harper_core::linting::{LintGroup, Linter};
2+
use harper_core::parsers::MarkdownOptions;
3+
use harper_core::spell::FstDictionary;
4+
use harper_core::{Dialect, Document};
5+
use harper_git_commit::GitCommitParser;
6+
7+
/// Creates a unit test checking that the linting of a git commit document (in
8+
/// `tests_sources`) produces the expected number of lints.
9+
macro_rules! create_test {
10+
($filename:ident.txt, $correct_expected:expr) => {
11+
paste::paste! {
12+
#[test]
13+
fn [<lints_ $filename _correctly>](){
14+
let source = include_str!(
15+
concat!(
16+
"./test_sources/",
17+
concat!(stringify!($filename), ".txt")
18+
)
19+
);
20+
21+
let dict = FstDictionary::curated();
22+
let document = Document::new(source, &GitCommitParser::new(MarkdownOptions::default()), &dict);
23+
24+
let mut linter = LintGroup::new_curated(dict, Dialect::American);
25+
let lints = linter.lint(&document);
26+
27+
dbg!(&lints);
28+
assert_eq!(lints.len(), $correct_expected);
29+
30+
// Make sure that all generated tokens span real characters
31+
for token in document.tokens(){
32+
assert!(token.span.try_get_content(document.get_source()).is_some());
33+
}
34+
}
35+
}
36+
};
37+
}
38+
39+
create_test!(simple_commit.txt, 1);
40+
create_test!(complex_verbose_commit.txt, 2);
41+
create_test!(conventional_commit.txt, 2);
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
This is the the subject
2+
3+
This is a first line without typos
4+
# This is a comment
5+
This is a line below the comment with typooos
6+
7+
# Please enter the commit message for your changes. Lines starting
8+
# with '#' will be ignored, and an empty message aborts the commit.
9+
#
10+
# On branch main
11+
#
12+
# Initial commit
13+
#
14+
# Changes to be committed:
15+
#new file: myfile.txt
16+
#
17+
# ------------------------ >8 ------------------------
18+
# Do not modify or remove the line above.
19+
# Everything below it will be ignored.
20+
diff --git a/myfile.txt b/myfile.txt
21+
new file mode 100644
22+
index 0000000..485c415
23+
--- /dev/null
24+
+++ b/myfile.txt
25+
@@ -0,0 +1 @@
26+
+some textt in the file that was added (the typo should be ignored)
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
feat(stuff): use session-based authentiation
2+
3+
BREAKING CHANGE: JWT authentication removed. API clients mustt now use
4+
session cookies instead of Authorization headers with bearer tokens.
5+
6+
Sessions expire after 24 hours of inactvity.
7+
8+
Closes: #247
9+
Reviewed-by: John Doe <[email protected]>
10+
11+
# Please enter the commit message for your changes. Lines starting
12+
# with '#' will be ignored, and an empty message aborts the commit.
13+
#
14+
# On branch main
15+
# Changes to be committed:
16+
# modified: myfile.txt
17+
#
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
A simple commit with a typo: comit

harper-ls/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ harper-stats = { path = "../harper-stats", version = "0.67.0" }
1212
harper-literate-haskell = { path = "../harper-literate-haskell", version = "0.67.0" }
1313
harper-core = { path = "../harper-core", version = "0.67.0", features = ["concurrent"] }
1414
harper-comments = { path = "../harper-comments", version = "0.67.0" }
15+
harper-git-commit = { path = "../harper-git-commit", version = "0.67.0" }
1516
harper-typst = { path = "../harper-typst", version = "0.67.0" }
1617
harper-html = { path = "../harper-html", version = "0.67.0" }
1718
harper-python = { path = "../harper-python", version = "0.67.0" }

harper-ls/src/backend.rs

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ use std::sync::Arc;
77
use crate::config::Config;
88
use crate::dictionary_io::{load_dict, save_dict};
99
use crate::document_state::DocumentState;
10-
use crate::git_commit_parser::GitCommitParser;
1110
use crate::ignored_lints_io::{load_ignored_lints, save_ignored_lints};
1211
use crate::io_utils::fileify_path;
1312
use anyhow::{Context, Result, anyhow};
@@ -19,6 +18,7 @@ use harper_core::parsers::{
1918
};
2019
use harper_core::spell::{Dictionary, FstDictionary, MergedDictionary, MutableDictionary};
2120
use harper_core::{Dialect, DictWordMetadata, Document, IgnoredLints};
21+
use harper_git_commit::GitCommitParser;
2222
use harper_html::HtmlParser;
2323
use harper_ink::InkParser;
2424
use harper_literate_haskell::LiterateHaskellParser;
@@ -382,9 +382,7 @@ impl Backend {
382382
}
383383
"ink" => Some(Box::new(InkParser::default())),
384384
"markdown" => Some(Box::new(Markdown::new(markdown_options))),
385-
"git-commit" | "gitcommit" => {
386-
Some(Box::new(GitCommitParser::new_markdown(markdown_options)))
387-
}
385+
"git-commit" | "gitcommit" => Some(Box::new(GitCommitParser::new(markdown_options))),
388386
"html" => Some(Box::new(HtmlParser::default())),
389387
"mail" | "plaintext" | "text" => Some(Box::new(PlainEnglish)),
390388
"typst" => Some(Box::new(Typst)),

0 commit comments

Comments
 (0)