diff --git a/src/treesitter-stamp/download-treesitter.cmake b/src/treesitter-stamp/download-treesitter.cmake index 7090edd86..f78e13ed4 100644 --- a/src/treesitter-stamp/download-treesitter.cmake +++ b/src/treesitter-stamp/download-treesitter.cmake @@ -22,16 +22,16 @@ function(check_file_hash has_hash hash_is_good) set("${has_hash}" TRUE PARENT_SCOPE) message(VERBOSE "verifying file... - file='/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz'") + file='/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz'") - file("SHA256" "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz" actual_value) + file("SHA256" "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz" actual_value) - if(NOT "${actual_value}" STREQUAL "6403b361b0014999e96f61b9c84d6950d42f0c7d6e806be79382e0232e48a11b") + if(NOT "${actual_value}" STREQUAL "8779f56822cb24e3696b08eda3e9c66b58765d15e0536c2d031fbea2faed60e7") set("${hash_is_good}" FALSE PARENT_SCOPE) message(VERBOSE "SHA256 hash of - /home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz + /home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz does not match expected value - expected: '6403b361b0014999e96f61b9c84d6950d42f0c7d6e806be79382e0232e48a11b' + expected: '8779f56822cb24e3696b08eda3e9c66b58765d15e0536c2d031fbea2faed60e7' actual: '${actual_value}'") else() set("${hash_is_good}" TRUE PARENT_SCOPE) @@ -71,32 +71,32 @@ function(sleep_before_download attempt) execute_process(COMMAND "${CMAKE_COMMAND}" -E sleep "${sleep_seconds}") endfunction() -if(EXISTS "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz") +if(EXISTS "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz") check_file_hash(has_hash hash_is_good) if(has_hash) if(hash_is_good) message(VERBOSE "File already exists and hash match (skip download): - file='/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz' - SHA256='6403b361b0014999e96f61b9c84d6950d42f0c7d6e806be79382e0232e48a11b'" + file='/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz' + SHA256='8779f56822cb24e3696b08eda3e9c66b58765d15e0536c2d031fbea2faed60e7'" ) return() else() message(VERBOSE "File already exists but hash mismatch. Removing...") - file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz") + file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz") endif() else() message(VERBOSE "File already exists but no hash specified (use URL_HASH): - file='/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz' + file='/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz' Old file will be removed and new file downloaded from URL." ) - file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz") + file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz") endif() endif() set(retry_number 5) message(VERBOSE "Downloading... - dst='/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz' + dst='/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz' timeout='none' inactivity timeout='none'" ) @@ -107,7 +107,7 @@ foreach(i RANGE ${retry_number}) if(status_code IN_LIST download_retry_codes) sleep_before_download(${i}) endif() - foreach(url IN ITEMS [====[https://github.com/tree-sitter/tree-sitter/archive/v0.23.0.tar.gz]====]) + foreach(url IN ITEMS [====[https://github.com/tree-sitter/tree-sitter/archive/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz]====]) if(NOT url IN_LIST skip_url_list) message(VERBOSE "Using src='${url}'") @@ -119,7 +119,7 @@ foreach(i RANGE ${retry_number}) file( DOWNLOAD - "${url}" "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz" + "${url}" "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz" # no TIMEOUT # no INACTIVITY_TIMEOUT @@ -136,7 +136,7 @@ foreach(i RANGE ${retry_number}) check_file_hash(has_hash hash_is_good) if(has_hash AND NOT hash_is_good) message(VERBOSE "Hash mismatch, removing...") - file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz") + file(REMOVE "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz") else() message(VERBOSE "Downloading... done") return() diff --git a/src/treesitter-stamp/extract-treesitter.cmake b/src/treesitter-stamp/extract-treesitter.cmake index 80d53df12..d39286920 100644 --- a/src/treesitter-stamp/extract-treesitter.cmake +++ b/src/treesitter-stamp/extract-treesitter.cmake @@ -5,7 +5,7 @@ cmake_minimum_required(VERSION 3.5) # Make file names absolute: # -get_filename_component(filename "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/v0.23.0.tar.gz" ABSOLUTE) +get_filename_component(filename "/home/runner/work/deps/deps/neovim/deps/build/downloads/treesitter/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz" ABSOLUTE) get_filename_component(directory "/home/runner/work/deps/deps/neovim/deps/build/src/treesitter" ABSOLUTE) message(VERBOSE "extracting... diff --git a/src/treesitter-stamp/treesitter-patch-info.txt b/src/treesitter-stamp/treesitter-patch-info.txt index 1fa5e8319..53e1e1e68 100644 --- a/src/treesitter-stamp/treesitter-patch-info.txt +++ b/src/treesitter-stamp/treesitter-patch-info.txt @@ -2,5 +2,5 @@ # The update step will be re-executed if anything in this file changes. # No other meaning or use of this file is supported. -command=/usr/local/bin/cmake;-E;copy;/home/runner/work/deps/deps/neovim/cmake.deps/cmake/TreesitterCMakeLists.txt;/home/runner/work/deps/deps/neovim/deps/build/src/treesitter/CMakeLists.txt -work_dir=/home/runner/work/deps/deps/neovim/deps/build/src/treesitter +command= +work_dir= diff --git a/src/treesitter-stamp/treesitter-urlinfo.txt b/src/treesitter-stamp/treesitter-urlinfo.txt index bbe77fb43..adb8f30ba 100644 --- a/src/treesitter-stamp/treesitter-urlinfo.txt +++ b/src/treesitter-stamp/treesitter-urlinfo.txt @@ -6,7 +6,7 @@ method=url command=/usr/local/bin/cmake;-DCMAKE_MESSAGE_LOG_LEVEL=VERBOSE;-P;/home/runner/work/deps/deps/neovim/deps/build/src/treesitter-stamp/download-treesitter.cmake;COMMAND;/usr/local/bin/cmake;-DCMAKE_MESSAGE_LOG_LEVEL=VERBOSE;-P;/home/runner/work/deps/deps/neovim/deps/build/src/treesitter-stamp/verify-treesitter.cmake;COMMAND;/usr/local/bin/cmake;-DCMAKE_MESSAGE_LOG_LEVEL=VERBOSE;-P;/home/runner/work/deps/deps/neovim/deps/build/src/treesitter-stamp/extract-treesitter.cmake source_dir=/home/runner/work/deps/deps/neovim/deps/build/src/treesitter work_dir=/home/runner/work/deps/deps/neovim/deps/build/src -url(s)=https://github.com/tree-sitter/tree-sitter/archive/v0.23.0.tar.gz -hash=SHA256=6403b361b0014999e96f61b9c84d6950d42f0c7d6e806be79382e0232e48a11b +url(s)=https://github.com/tree-sitter/tree-sitter/archive/99dbbbcbe9d7f3c286057e9f3bcc6e0b42690606.tar.gz +hash=SHA256=8779f56822cb24e3696b08eda3e9c66b58765d15e0536c2d031fbea2faed60e7 no_extract= diff --git a/src/treesitter/.github/workflows/backport.yml b/src/treesitter/.github/workflows/backport.yml new file mode 100644 index 000000000..0c3ba6be3 --- /dev/null +++ b/src/treesitter/.github/workflows/backport.yml @@ -0,0 +1,28 @@ +name: backport +on: + pull_request_target: + types: [closed, labeled] +jobs: + backport: + permissions: + contents: write + pull-requests: write + name: Backport Pull Request + if: github.event.pull_request.merged + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - uses: actions/create-github-app-token@v1 + id: app-token + with: + app-id: ${{ vars.BACKPORT_APP }} + private-key: ${{ secrets.BACKPORT_KEY }} + + - name: Create backport PR + id: backport + uses: korthout/backport-action@v3 + with: + pull_title: "${pull_title}" + label_pattern: "^ci:backport ([^ ]+)$" + github_token: ${{ steps.app-token.outputs.token }} diff --git a/src/treesitter/.gitignore b/src/treesitter/.gitignore index b94b8bda3..25738984e 100644 --- a/src/treesitter/.gitignore +++ b/src/treesitter/.gitignore @@ -32,4 +32,5 @@ docs/assets/js/tree-sitter.js *.wasm .swiftpm .build +build zig-* diff --git a/src/treesitter/CHANGELOG.md b/src/treesitter/CHANGELOG.md index 62c98baaa..d015e4334 100644 --- a/src/treesitter/CHANGELOG.md +++ b/src/treesitter/CHANGELOG.md @@ -1,5 +1,96 @@ # Changelog +## [0.23.0] - 2024-08-26 + +### Breaking + +- Introduce tree-sitter-language crate for grammar crates to depend on () +- Revert interning of a sequence or choice of a single rule () +- **bindings**: Use capsules in python () +- **dsl**: Support other JS runtimes () + +### Features + +- Add `fuzz` subcommand () +- Allow external scanners to use the logger () +- **bindings**: Add query constants to python +- **bindings**: Add node, python, swift tests () +- **bindings**: Update npm scripts () +- **cli**: Bump unicode data to v15.1.0 +- **cli**: Add debug build flag () +- **cli**: Attach helpful context when `grammar.json` cannot be found () +- **cli**: Add `--show-fields` flag to `test` command () +- **lib**: Add `ts_query_end_byte_for_pattern` () +- **lib**: Support no_std +- **zig**: Update outdated path syntax () + +### Bug Fixes + +- Always reset to the first language when iterating over language attributes () +- Better error when a supertype rule is invalid () +- Intern a sequence or choice of a single element the same as the element itself +- Do not "absorb" rules that consist of a single terminal if the rule is hidden () +- **bindings**: Update go bindings () +- **cli**: Installation via authenticated proxy () +- **cli**: Dedup `preceding_auxiliary_symbols` () +- **dsl**: Improve error message when a rule function returns undefined () +- **generate**: Rename `cargo.toml` template () +- **go**: Update parser name in binding files, add to docs () +- **lib**: A null clock must have `tv_nsec` be 0 as well () +- **lib**: Restrict pattern_map optimization when a wildcard step has an immediate first child () +- **lib**: An empty root node should not precede an empty range () +- **lib**: Fix api header C++ interop () +- **make**: Fail properly on Windows () +- **rust**: Fetch `CARGO_MANIFEST_DIR` at runtime in build script () +- **rust**: Fix new clippy warnings () +- **test**: Multi-grammar corpus tests are now in the repo root () +- **wasm**: Update test + +### Performance + +- Hoist out common subexpressions in satisfies_text_predicates () + +### Documentation + +- Update changelog +- Remove duplicate pr # in changelog +- Add note for bullet +- Fix syntax highlighting unit testing example () +- Add tsserver annotation to example () +- Fix tree cursor documentation () +- Document rust library features () +- Clean up binding & parser lists () + +### Refactor + +- Remove ansi_term dependency () +- Remove difference dependency () +- **scripts**: Clean up bash scripts () + +### Testing + +- Modernize scanner files () + +### Build System and CI + +- **deps**: bump wasmtime, cc, and wasmparser ( +- **bindings**: Use language version in soname () +- **lib**: Include the minor in the soname +- **loader**: Make dependencies optional () +- **swift**: Declare header search path () +- **wasm**: Don't minify JS () +- **wasm**: Bump emscripten to 3.1.64 () +- **wasm**: Support big endian machines () +- **zig**: Git ignore updated Zig cache directory () + +### Other + +- Swap `sprintf()` for `snprintf()` () +- Add `.build` to gitignore () +- Reset language when resetting wasm store () +- Clone wasm store engine () +- **bindings**: Fix indent & line endings () + ## [0.22.6] — 2024-05-05 ### Features @@ -259,7 +350,7 @@ They don't have any dynamic global data, so all it takes is just declaring them as such - Fix crash when attempting to load ancient languages via wasm () - Use workspace dependencies for internal crates like Tree-sitter () -- Remove vendored wasmtime headers (https://github.com/tree-sitter/tree-sitter/pull/3084) +- Remove vendored wasmtime headers () When building rust binding, use wasmtime headers provided via cargo by the wasmtime-c-api crate. - Fix invalid parse stack recursive merging with mismatched error cost () diff --git a/src/treesitter/CMakeLists.txt b/src/treesitter/CMakeLists.txt deleted file mode 100644 index 3d3705c8a..000000000 --- a/src/treesitter/CMakeLists.txt +++ /dev/null @@ -1,17 +0,0 @@ -cmake_minimum_required(VERSION 3.16) -project(treesitter C) - -add_compile_options(-w) - -add_library(tree-sitter lib/src/lib.c) -target_include_directories(tree-sitter - PRIVATE lib/src lib/src/wasm lib/include) - -install(FILES - lib/include/tree_sitter/api.h - DESTINATION include/tree_sitter) - -include(GNUInstallDirs) -install(TARGETS tree-sitter DESTINATION ${CMAKE_INSTALL_LIBDIR}) - -# vim: set ft=cmake: diff --git a/src/treesitter/Cargo.lock b/src/treesitter/Cargo.lock index 969822824..b1d353409 100644 --- a/src/treesitter/Cargo.lock +++ b/src/treesitter/Cargo.lock @@ -74,9 +74,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.86" +version = "1.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" +checksum = "86fdf8605db99b54d3cd748a44c6d04df638eb5dafb219b135d0149bd0db01f6" [[package]] name = "arbitrary" @@ -92,16 +92,14 @@ checksum = "d92bec98840b8f03a5ff5413de5293bfcd8bf96467cf5452609f939ec6f5de16" [[package]] name = "bindgen" -version = "0.69.4" +version = "0.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" +checksum = "f49d8fed880d473ea71efb9bf597651e77201bdd4893efe54c9e5d65ae04ce6f" dependencies = [ "bitflags", "cexpr", "clang-sys", - "itertools", - "lazy_static", - "lazycell", + "itertools 0.13.0", "log", "prettyplease", "proc-macro2", @@ -110,7 +108,6 @@ dependencies = [ "rustc-hash", "shlex", "syn", - "which", ] [[package]] @@ -128,6 +125,17 @@ dependencies = [ "objc2", ] +[[package]] +name = "bstr" +version = "1.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40723b8fb387abc38f4f4a37c09073622e41dd12327033091ef8950659e6dc0c" +dependencies = [ + "memchr", + "regex-automata", + "serde", +] + [[package]] name = "bumpalo" version = "3.16.0" @@ -148,9 +156,9 @@ checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" [[package]] name = "cc" -version = "1.1.14" +version = "1.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d2eb3cd3d1bf4529e31c215ee6f93ec5a3d536d9f578f93d9d33ee19562932" +checksum = "45bcde016d64c21da4be18b655631e5ab6d3107607e71a73a9f53eb48aae23fb" dependencies = [ "jobserver", "libc", @@ -203,9 +211,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.16" +version = "4.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed6719fffa43d0d87e5fd8caeab59be1554fb028cd30edc88fc4369b17971019" +checksum = "3e5a21b8495e732f1b3c364c9949b201ca7bae518c502c80256c96ad79eaf6ac" dependencies = [ "clap_builder", "clap_derive", @@ -213,9 +221,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.15" +version = "4.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216aec2b177652e3846684cbfe25c9964d18ec45234f0f5da5157b207ed1aab6" +checksum = "8cf2dd12af7a047ad9d6da2b6b249759a22a7abc0f474c1dae1777afa4b21a73" dependencies = [ "anstream", "anstyle", @@ -265,9 +273,9 @@ dependencies = [ [[package]] name = "core-foundation" -version = "0.9.4" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e195e091a93c46f7102ec7818a2aa394e1e1771c3ab4825963fa03e45afb8f" +checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" dependencies = [ "core-foundation-sys", "libc", @@ -394,7 +402,7 @@ dependencies = [ "cranelift-codegen", "cranelift-entity", "cranelift-frontend", - "itertools", + "itertools 0.12.1", "log", "smallvec", "wasmparser", @@ -505,9 +513,9 @@ checksum = "e8c02a5121d4ea3eb16a80748c74f5549a5665e4c21333c6098f283870fbdea6" [[package]] name = "filetime" -version = "0.2.24" +version = "0.2.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bf401df4a4e3872c4fe8151134cf483738e74b67fc934d6532c882b3d24a4550" +checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" dependencies = [ "cfg-if", "libc", @@ -650,9 +658,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93ead53efc7ea8ed3cfb0c79fc8023fbb782a5432b52830b6518941cebe6505c" +checksum = "68b900aa2f7301e21c36462b170ee99994de34dff39a4a6a528e80e7376d07e5" dependencies = [ "equivalent", "hashbrown 0.14.5", @@ -680,6 +688,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.11" @@ -732,12 +749,6 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "leb128" version = "0.2.5" @@ -923,9 +934,9 @@ dependencies = [ [[package]] name = "object" -version = "0.36.3" +version = "0.36.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9" +checksum = "084f1a5821ac4c651660a94a7153d27ac9d8a53736203f58b31945ded098070a" dependencies = [ "crc32fast", "hashbrown 0.14.5", @@ -969,6 +980,12 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" +[[package]] +name = "path-slash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e91099d4268b0e11973f036e885d652fb0b21fedcf69738c627f94db6a44f42" + [[package]] name = "percent-encoding" version = "2.3.1" @@ -1010,9 +1027,9 @@ dependencies = [ [[package]] name = "pretty_assertions" -version = "1.4.0" +version = "1.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af7cee1a6c8a5b9208b3cb1061f10c0cb689087b3d8ce85fb9d2dd7a29b6ba66" +checksum = "3ae130e2f271fbc2ac3a40fb1d07180839cdbbe443c7a27e1e3c13c5cac0116d" dependencies = [ "diff", "yansi", @@ -1039,9 +1056,9 @@ dependencies = [ [[package]] name = "psm" -version = "0.1.21" +version = "0.1.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5787f7cda34e3033a72192c018bc5883100330f362ef279a8cbccfce8bb4e874" +checksum = "aa37f80ca58604976033fae9515a8a2989fc13797d953f7c04fb8fa36a11f205" dependencies = [ "cc", ] @@ -1087,9 +1104,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.3" +version = "0.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a908a6e00f1fdd0dfd9c0eb08ce85126f6d8bbda50017e74bc4a4b7d4a926a4" +checksum = "0884ad60e090bf1345b93da0a5de8923c93884cd03f40dfcfddd3b4bee661853" dependencies = [ "bitflags", ] @@ -1155,9 +1172,9 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustix" -version = "0.38.34" +version = "0.38.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70dc5ec042f7a43c4a73241207cecc9873a06d45debb38b329f8541d85c2730f" +checksum = "8acb788b847c24f28525660c4d7758620a7210875711f79e7f663cc152726811" dependencies = [ "bitflags", "errno", @@ -1189,18 +1206,18 @@ checksum = "61697e0a1c7e512e84a621326239844a24d8207b4669b41bc18b32ea5cbf988b" [[package]] name = "serde" -version = "1.0.209" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99fce0ffe7310761ca6bf9faf5115afbc19688edd00171d81b1bb1b116c63e09" +checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.209" +version = "1.0.210" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5831b979fd7b5439637af1752d535ff49f4860c0f341d1baeb6faf0f4242170" +checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f" dependencies = [ "proc-macro2", "quote", @@ -1209,9 +1226,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.127" +version = "1.0.128" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8043c06d9f82bd7271361ed64f415fe5e12a77fdb52e573e7f06a516dea329ad" +checksum = "6ff5456707a1de34e7e37f2a6fd3d3f808c318259cbd01ab6377795054b483d8" dependencies = [ "indexmap", "itoa", @@ -1282,9 +1299,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.76" +version = "2.0.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "578e081a14e0cefc3279b0472138c513f37b41a08d5a3cca9b6e4e8ceb6cd525" +checksum = "9f35bcdf61fd8e7be6caf75f429fdca8beb3ed76584befb503b1569faee373ed" dependencies = [ "proc-macro2", "quote", @@ -1389,9 +1406,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.20" +version = "0.22.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "583c44c02ad26b0c3f3066fe629275e50627026c51ac2e595cca4c230ce1ce1d" +checksum = "3b072cee73c449a636ffd6f32bd8de3a9f7119139aff882f44943ce2986dc5cf" dependencies = [ "indexmap", "serde", @@ -1449,6 +1466,7 @@ version = "0.23.0" dependencies = [ "anstyle", "anyhow", + "bstr", "clap", "ctor", "ctrlc", @@ -1523,6 +1541,7 @@ dependencies = [ "indoc", "libloading", "once_cell", + "path-slash", "regex", "serde", "serde_json", @@ -1560,9 +1579,9 @@ checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" +checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe" [[package]] name = "unicode-normalization" @@ -1933,9 +1952,9 @@ dependencies = [ [[package]] name = "webbrowser" -version = "1.0.1" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "425ba64c1e13b1c6e8c5d2541c8fac10022ca584f33da781db01b5756aef1f4e" +checksum = "2e5f07fb9bc8de2ddfe6b24a71a75430673fd679e568c48b52716cef1cfae923" dependencies = [ "block2", "core-foundation", @@ -1949,18 +1968,6 @@ dependencies = [ "web-sys", ] -[[package]] -name = "which" -version = "4.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7" -dependencies = [ - "either", - "home", - "once_cell", - "rustix", -] - [[package]] name = "winapi-util" version = "0.1.9" @@ -2225,9 +2232,9 @@ dependencies = [ [[package]] name = "yansi" -version = "0.5.1" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" [[package]] name = "zerocopy" diff --git a/src/treesitter/Cargo.toml b/src/treesitter/Cargo.toml index 6f1f984a4..7594e80b4 100644 --- a/src/treesitter/Cargo.toml +++ b/src/treesitter/Cargo.toml @@ -40,9 +40,10 @@ strip = false [workspace.dependencies] anstyle = "1.0.8" -anyhow = "1.0.86" -cc = "1.1.13" -clap = { version = "4.5.16", features = [ +anyhow = "1.0.89" +bstr = "1.10.0" +cc = "1.1.19" +clap = { version = "4.5.17", features = [ "cargo", "derive", "env", @@ -52,28 +53,29 @@ clap = { version = "4.5.16", features = [ ctor = "0.2.8" ctrlc = { version = "3.4.5", features = ["termination"] } dirs = "5.0.1" -filetime = "0.2.24" +filetime = "0.2.25" fs4 = "0.8.4" git2 = "0.18.3" glob = "0.3.1" heck = "0.5.0" html-escape = "0.2.13" -indexmap = "2.4.0" +indexmap = "2.5.0" indoc = "2.0.5" lazy_static = "1.5.0" libloading = "0.8.5" log = { version = "0.4.22", features = ["std"] } memchr = "2.7.4" once_cell = "1.19.0" -pretty_assertions = "1.4.0" +path-slash = "0.2.1" +pretty_assertions = "1.4.1" rand = "0.8.5" regex = "1.10.6" regex-syntax = "0.8.4" rustc-hash = "1.1.0" semver = "1.0.23" -serde = { version = "1.0.208", features = ["derive"] } +serde = { version = "1.0.210", features = ["derive"] } serde_derive = "1.0.197" -serde_json = { version = "1.0.125", features = ["preserve_order"] } +serde_json = { version = "1.0.128", features = ["preserve_order"] } similar = "2.6.0" smallbitvec = "2.5.3" tempfile = "3.12.0" @@ -83,7 +85,7 @@ toml = "0.8.19" unindent = "0.2.3" walkdir = "2.5.0" wasmparser = "0.215.0" -webbrowser = "1.0.1" +webbrowser = "1.0.2" tree-sitter = { version = "0.23.0", path = "./lib" } tree-sitter-loader = { version = "0.23.0", path = "./cli/loader" } diff --git a/src/treesitter/Makefile b/src/treesitter/Makefile index 465f940df..4127acd34 100644 --- a/src/treesitter/Makefile +++ b/src/treesitter/Makefile @@ -3,6 +3,8 @@ $(error Windows is not supported) endif VERSION := 0.23.0 +DESCRIPTION := An incremental parsing system for programming tools +HOMEPAGE_URL := https://tree-sitter.github.io/tree-sitter/ # install directory layout PREFIX ?= /usr/local @@ -32,7 +34,7 @@ SONAME_MAJOR := $(word 1,$(subst ., ,$(VERSION))) SONAME_MINOR := $(word 2,$(subst ., ,$(VERSION))) # OS-specific bits -ifeq ($(shell uname),Darwin) +ifneq ($(findstring darwin,$(shell $(CC) -dumpmachine)),) SOEXT = dylib SOEXTVER_MAJOR = $(SONAME_MAJOR).$(SOEXT) SOEXTVER = $(SONAME_MAJOR).$(SONAME_MINOR).$(SOEXT) @@ -58,12 +60,13 @@ ifneq ($(STRIP),) $(STRIP) $@ endif -tree-sitter.pc: tree-sitter.pc.in - sed -e 's|@VERSION@|$(VERSION)|' \ - -e 's|@LIBDIR@|$(LIBDIR)|' \ - -e 's|@INCLUDEDIR@|$(INCLUDEDIR)|' \ - -e 's|=$(PREFIX)|=$${prefix}|' \ - -e 's|@PREFIX@|$(PREFIX)|' $< > $@ +tree-sitter.pc: lib/tree-sitter.pc.in + sed -e 's|@PROJECT_VERSION@|$(VERSION)|' \ + -e 's|@CMAKE_INSTALL_LIBDIR@|$(LIBDIR)|' \ + -e 's|@CMAKE_INSTALL_INCLUDEDIR@|$(INCLUDEDIR)|' \ + -e 's|@PROJECT_DESCRIPTION@|$(DESCRIPTION)|' \ + -e 's|@PROJECT_HOMEPAGE_URL@|$(HOMEPAGE_URL)|' \ + -e 's|@CMAKE_INSTALL_PREFIX@|$(PREFIX)|' $< > $@ clean: $(RM) $(OBJ) tree-sitter.pc libtree-sitter.a libtree-sitter.$(SOEXT) diff --git a/src/treesitter/Package.swift b/src/treesitter/Package.swift index 6135da0c1..3a4b9744f 100644 --- a/src/treesitter/Package.swift +++ b/src/treesitter/Package.swift @@ -14,27 +14,6 @@ let package = Package( targets: [ .target(name: "TreeSitter", path: "lib", - exclude: [ - "binding_rust", - "binding_web", - "node_modules", - "Cargo.toml", - "README.md", - "src/unicode/README.md", - "src/unicode/LICENSE", - "src/unicode/ICU_SHA", - "src/get_changed_ranges.c", - "src/tree_cursor.c", - "src/stack.c", - "src/node.c", - "src/lexer.c", - "src/parser.c", - "src/language.c", - "src/alloc.c", - "src/subtree.c", - "src/tree.c", - "src/query.c" - ], sources: ["src/lib.c"], cSettings: [.headerSearchPath("src")]), ], diff --git a/src/treesitter/cli/Cargo.toml b/src/treesitter/cli/Cargo.toml index 493503ce7..34fa31db9 100644 --- a/src/treesitter/cli/Cargo.toml +++ b/src/treesitter/cli/Cargo.toml @@ -27,6 +27,7 @@ wasm = ["tree-sitter/wasm", "tree-sitter-loader/wasm"] [dependencies] anstyle.workspace = true anyhow.workspace = true +bstr.workspace = true clap.workspace = true ctor.workspace = true ctrlc.workspace = true diff --git a/src/treesitter/cli/loader/Cargo.toml b/src/treesitter/cli/loader/Cargo.toml index 5179b36d6..0c4df0af8 100644 --- a/src/treesitter/cli/loader/Cargo.toml +++ b/src/treesitter/cli/loader/Cargo.toml @@ -26,6 +26,7 @@ fs4.workspace = true indoc.workspace = true libloading.workspace = true once_cell.workspace = true +path-slash.workspace = true regex.workspace = true serde.workspace = true serde_json.workspace = true diff --git a/src/treesitter/cli/loader/src/lib.rs b/src/treesitter/cli/loader/src/lib.rs index 610d0602a..1268e7002 100644 --- a/src/treesitter/cli/loader/src/lib.rs +++ b/src/treesitter/cli/loader/src/lib.rs @@ -23,6 +23,7 @@ use fs4::FileExt; use indoc::indoc; use libloading::{Library, Symbol}; use once_cell::unsync::OnceCell; +use path_slash::PathBufExt as _; use regex::{Regex, RegexBuilder}; use serde::{Deserialize, Deserializer, Serialize}; use tree_sitter::Language; @@ -609,15 +610,10 @@ impl Loader { .host(BUILD_HOST) .debug(self.debug_build) .file(&config.parser_path) - .includes(&config.header_paths); + .includes(&config.header_paths) + .std("c11"); if let Some(scanner_path) = config.scanner_path.as_ref() { - if scanner_path.extension() != Some("c".as_ref()) { - cc_config.cpp(true); - eprintln!("Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future."); - } else { - cc_config.std("c11"); - } cc_config.file(scanner_path); } @@ -823,7 +819,7 @@ impl Loader { path.push(src_path.strip_prefix(root_path).unwrap()); path }; - command.args(["--workdir", &workdir.to_string_lossy()]); + command.args(["--workdir", &workdir.to_slash_lossy()]); // Mount the root directory as a volume, which is the repo root let mut volume_string = OsString::from(&root_path); @@ -882,14 +878,6 @@ impl Loader { ]); if let Some(scanner_filename) = scanner_filename { - if scanner_filename - .extension() - .and_then(|ext| ext.to_str()) - .map_or(false, |ext| ["cc", "cpp"].contains(&ext)) - { - eprintln!("Warning: Using a C++ scanner is now deprecated. Please migrate your scanner code to C, as C++ support will be removed in the near future."); - command.arg("-xc++"); - } command.arg(scanner_filename); } @@ -1204,14 +1192,8 @@ impl Loader { #[must_use] pub fn get_scanner_path(&self, src_path: &Path) -> Option { - let mut path = src_path.join("scanner.c"); - for extension in ["c", "cc", "cpp"] { - path.set_extension(extension); - if path.exists() { - return Some(path); - } - } - None + let path = src_path.join("scanner.c"); + path.exists().then_some(path) } } diff --git a/src/treesitter/cli/src/fuzz/edits.rs b/src/treesitter/cli/src/fuzz/edits.rs index 788eef5bc..ef862d8e7 100644 --- a/src/treesitter/cli/src/fuzz/edits.rs +++ b/src/treesitter/cli/src/fuzz/edits.rs @@ -7,6 +7,7 @@ pub struct Edit { pub inserted_text: Vec, } +#[must_use] pub fn invert_edit(input: &[u8], edit: &Edit) -> Edit { let position = edit.position; let removed_content = &input[position..(position + edit.deleted_length)]; diff --git a/src/treesitter/cli/src/fuzz/mod.rs b/src/treesitter/cli/src/fuzz/mod.rs index 14f50c723..38993e1f0 100644 --- a/src/treesitter/cli/src/fuzz/mod.rs +++ b/src/treesitter/cli/src/fuzz/mod.rs @@ -27,7 +27,8 @@ lazy_static! { pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok(); pub static ref LOG_GRAPH_ENABLED: bool = env::var("TREE_SITTER_LOG_GRAPHS").is_ok(); pub static ref LANGUAGE_FILTER: Option = env::var("TREE_SITTER_LANGUAGE").ok(); - pub static ref EXAMPLE_FILTER: Option = regex_env_var("TREE_SITTER_EXAMPLE"); + pub static ref EXAMPLE_INCLUDE: Option = regex_env_var("TREE_SITTER_EXAMPLE_INCLUDE"); + pub static ref EXAMPLE_EXCLUDE: Option = regex_env_var("TREE_SITTER_EXAMPLE_EXCLUDE"); pub static ref START_SEED: usize = new_seed(); pub static ref EDIT_COUNT: usize = int_env_var("TREE_SITTER_EDITS").unwrap_or(3); pub static ref ITERATION_COUNT: usize = int_env_var("TREE_SITTER_ITERATIONS").unwrap_or(10); @@ -41,6 +42,7 @@ fn regex_env_var(name: &'static str) -> Option { env::var(name).ok().and_then(|e| Regex::new(&e).ok()) } +#[must_use] pub fn new_seed() -> usize { int_env_var("TREE_SITTER_SEED").unwrap_or_else(|| { let mut rng = rand::thread_rng(); @@ -53,7 +55,8 @@ pub struct FuzzOptions { pub subdir: Option, pub edits: usize, pub iterations: usize, - pub filter: Option, + pub include: Option, + pub exclude: Option, pub log_graphs: bool, pub log: bool, } @@ -65,20 +68,6 @@ pub fn fuzz_language_corpus( grammar_dir: &Path, options: &mut FuzzOptions, ) { - let subdir = options.subdir.take().unwrap_or_default(); - - let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus"); - - if !corpus_dir.exists() || !corpus_dir.is_dir() { - eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file."); - return; - } - - if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 { - eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory."); - return; - } - fn retain(entry: &mut TestEntry, language_name: &str) -> bool { match entry { TestEntry::Example { attributes, .. } => { @@ -97,6 +86,20 @@ pub fn fuzz_language_corpus( } } + let subdir = options.subdir.take().unwrap_or_default(); + + let corpus_dir = grammar_dir.join(subdir).join("test").join("corpus"); + + if !corpus_dir.exists() || !corpus_dir.is_dir() { + eprintln!("No corpus directory found, ensure that you have a `test/corpus` directory in your grammar directory with at least one test file."); + return; + } + + if std::fs::read_dir(&corpus_dir).unwrap().count() == 0 { + eprintln!("No corpus files found in `test/corpus`, ensure that you have at least one test file in your corpus directory."); + return; + } + let mut main_tests = parse_tests(&corpus_dir).unwrap(); match main_tests { TestEntry::Group { @@ -104,15 +107,24 @@ pub fn fuzz_language_corpus( } => { children.retain_mut(|child| retain(child, language_name)); } - _ => unreachable!(), + TestEntry::Example { .. } => unreachable!(), } - let tests = flatten_tests(main_tests, options.filter.as_ref()); - - let mut skipped = options.skipped.as_ref().map(|x| { - x.iter() - .map(|x| (x.as_str(), 0)) - .collect::>() - }); + let tests = flatten_tests( + main_tests, + options.include.as_ref(), + options.exclude.as_ref(), + ); + + let get_test_name = |test: &FlattenedTest| format!("{language_name} - {}", test.name); + + let mut skipped = options + .skipped + .take() + .unwrap_or_default() + .into_iter() + .chain(tests.iter().filter(|x| x.skip).map(get_test_name)) + .map(|x| (x, 0)) + .collect::>(); let mut failure_count = 0; @@ -125,13 +137,11 @@ pub fn fuzz_language_corpus( println!(); for (test_index, test) in tests.iter().enumerate() { - let test_name = format!("{language_name} - {}", test.name); - if let Some(skipped) = skipped.as_mut() { - if let Some(counter) = skipped.get_mut(test_name.as_str()) { - println!(" {test_index}. {test_name} - SKIPPED"); - *counter += 1; - continue; - } + let test_name = get_test_name(test); + if let Some(counter) = skipped.get_mut(test_name.as_str()) { + println!(" {test_index}. {test_name} - SKIPPED"); + *counter += 1; + continue; } println!(" {test_index}. {test_name}"); @@ -143,6 +153,11 @@ pub fn fuzz_language_corpus( set_included_ranges(&mut parser, &test.input, test.template_delimiters); let tree = parser.parse(&test.input, None).unwrap(); + + if test.error { + return true; + } + let mut actual_output = tree.root_node().to_sexp(); if !test.has_fields { actual_output = strip_sexp_fields(&actual_output); @@ -240,7 +255,7 @@ pub fn fuzz_language_corpus( actual_output = strip_sexp_fields(&actual_output); } - if actual_output != test.output { + if actual_output != test.output && !test.error { println!("Incorrect parse for {test_name} - seed {seed}"); print_diff_key(); print_diff(&actual_output, &test.output, true); @@ -272,16 +287,14 @@ pub fn fuzz_language_corpus( eprintln!("{failure_count} {language_name} corpus tests failed fuzzing"); } - if let Some(skipped) = skipped.as_mut() { - skipped.retain(|_, v| *v == 0); + skipped.retain(|_, v| *v == 0); - if !skipped.is_empty() { - println!("Non matchable skip definitions:"); - for k in skipped.keys() { - println!(" {k}"); - } - panic!("Non matchable skip definitions needs to be removed"); + if !skipped.is_empty() { + println!("Non matchable skip definitions:"); + for k in skipped.keys() { + println!(" {k}"); } + panic!("Non matchable skip definitions needs to be removed"); } } @@ -290,14 +303,22 @@ pub struct FlattenedTest { pub input: Vec, pub output: String, pub languages: Vec>, + pub error: bool, + pub skip: bool, pub has_fields: bool, pub template_delimiters: Option<(&'static str, &'static str)>, } -pub fn flatten_tests(test: TestEntry, filter: Option<&Regex>) -> Vec { +#[must_use] +pub fn flatten_tests( + test: TestEntry, + include: Option<&Regex>, + exclude: Option<&Regex>, +) -> Vec { fn helper( test: TestEntry, - filter: Option<&Regex>, + include: Option<&Regex>, + exclude: Option<&Regex>, is_root: bool, prefix: &str, result: &mut Vec, @@ -315,8 +336,13 @@ pub fn flatten_tests(test: TestEntry, filter: Option<&Regex>) -> Vec) -> Vec) -> Vec Self { Self(StdRng::seed_from_u64(seed as u64)) } diff --git a/src/treesitter/cli/src/fuzz/scope_sequence.rs b/src/treesitter/cli/src/fuzz/scope_sequence.rs index 436455d4a..686470151 100644 --- a/src/treesitter/cli/src/fuzz/scope_sequence.rs +++ b/src/treesitter/cli/src/fuzz/scope_sequence.rs @@ -6,6 +6,7 @@ pub struct ScopeSequence(Vec); type ScopeStack = Vec<&'static str>; impl ScopeSequence { + #[must_use] pub fn new(tree: &Tree) -> Self { let mut result = Self(Vec::new()); let mut scope_stack = Vec::new(); diff --git a/src/treesitter/cli/src/generate/build_tables/build_lex_table.rs b/src/treesitter/cli/src/generate/build_tables/build_lex_table.rs index f7bff0d93..215e45b1c 100644 --- a/src/treesitter/cli/src/generate/build_tables/build_lex_table.rs +++ b/src/treesitter/cli/src/generate/build_tables/build_lex_table.rs @@ -10,6 +10,7 @@ use crate::generate::{ dedup::split_state_id_groups, grammars::{LexicalGrammar, SyntaxGrammar}, nfa::{CharacterSet, NfaCursor}, + prepare_grammar::symbol_is_used, rules::{Symbol, TokenSet}, tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable}, }; @@ -93,6 +94,9 @@ pub fn build_lex_table( let mut large_character_sets = Vec::new(); for (variable_ix, _variable) in lexical_grammar.variables.iter().enumerate() { let symbol = Symbol::terminal(variable_ix); + if !symbol_is_used(&syntax_grammar.variables, symbol) { + continue; + } builder.reset(); builder.add_state_for_tokens(&TokenSet::from_iter([symbol])); for state in &builder.table.states { diff --git a/src/treesitter/cli/src/generate/build_tables/item.rs b/src/treesitter/cli/src/generate/build_tables/item.rs index da19c4baf..cfc725409 100644 --- a/src/treesitter/cli/src/generate/build_tables/item.rs +++ b/src/treesitter/cli/src/generate/build_tables/item.rs @@ -144,7 +144,7 @@ impl<'a> ParseItem<'a> { /// Create an item identical to this one, but with a different production. /// This is used when dynamically "inlining" certain symbols in a production. - pub const fn substitute_production(&self, production: &'a Production) -> ParseItem<'a> { + pub const fn substitute_production(&self, production: &'a Production) -> Self { let mut result = *self; result.production = production; result diff --git a/src/treesitter/cli/src/generate/build_tables/item_set_builder.rs b/src/treesitter/cli/src/generate/build_tables/item_set_builder.rs index ff0323c5d..16305bd33 100644 --- a/src/treesitter/cli/src/generate/build_tables/item_set_builder.rs +++ b/src/treesitter/cli/src/generate/build_tables/item_set_builder.rs @@ -237,7 +237,7 @@ impl<'a> ParseItemSetBuilder<'a> { result } - pub fn transitive_closure(&mut self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> { + pub fn transitive_closure(&self, item_set: &ParseItemSet<'a>) -> ParseItemSet<'a> { let mut result = ParseItemSet::default(); for (item, lookaheads) in &item_set.entries { if let Some(productions) = self diff --git a/src/treesitter/cli/src/generate/grammar_files.rs b/src/treesitter/cli/src/generate/grammar_files.rs index f45cf7f70..42690c36d 100644 --- a/src/treesitter/cli/src/generate/grammar_files.rs +++ b/src/treesitter/cli/src/generate/grammar_files.rs @@ -334,10 +334,12 @@ pub fn generate_grammar_files( .ok_or_else(|| anyhow!("Failed to find the end of the `tree-sitter` version in Cargo.toml"))?; let cargo_toml = format!( - "{}{}{}", + "{}{}{}\n{}\n{}", &cargo_toml[..start_index], "tree-sitter-language = \"0.1.0\"", &cargo_toml[version_end_index + 1..], + "[dev-dependencies]", + "tree-sitter = \"0.23\"", ); write_file(path, cargo_toml)?; diff --git a/src/treesitter/cli/src/generate/mod.rs b/src/treesitter/cli/src/generate/mod.rs index 0ce63d1b9..dc3926345 100644 --- a/src/treesitter/cli/src/generate/mod.rs +++ b/src/treesitter/cli/src/generate/mod.rs @@ -130,7 +130,7 @@ pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String let input_grammar = parse_grammar(&grammar_json)?; let parser = generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?; - Ok((input_grammar.name.clone(), parser.c_code)) + Ok((input_grammar.name, parser.c_code)) } fn generate_parser_for_grammar_with_opts( diff --git a/src/treesitter/cli/src/generate/node_types.rs b/src/treesitter/cli/src/generate/node_types.rs index 25353e8c7..0ac159cd6 100644 --- a/src/treesitter/cli/src/generate/node_types.rs +++ b/src/treesitter/cli/src/generate/node_types.rs @@ -36,6 +36,8 @@ pub struct NodeInfoJSON { #[serde(rename = "type")] kind: String, named: bool, + #[serde(skip_serializing_if = "std::ops::Not::not")] + root: bool, #[serde(skip_serializing_if = "Option::is_none")] fields: Option>, #[serde(skip_serializing_if = "Option::is_none")] @@ -475,6 +477,7 @@ pub fn generate_node_types_json( .or_insert_with(|| NodeInfoJSON { kind: variable.name.clone(), named: true, + root: false, fields: None, children: None, subtypes: None, @@ -520,6 +523,7 @@ pub fn generate_node_types_json( NodeInfoJSON { kind: kind.clone(), named: is_named, + root: i == 0, fields: Some(BTreeMap::new()), children: None, subtypes: None, @@ -628,13 +632,17 @@ pub fn generate_node_types_json( for (name, kind) in regular_tokens.chain(external_tokens) { match kind { VariableType::Named => { - let node_type_json = node_types_json.entry(name.clone()).or_insert(NodeInfoJSON { - kind: name.clone(), - named: true, - fields: None, - children: None, - subtypes: None, - }); + let node_type_json = + node_types_json + .entry(name.clone()) + .or_insert_with(|| NodeInfoJSON { + kind: name.clone(), + named: true, + root: false, + fields: None, + children: None, + subtypes: None, + }); if let Some(children) = &mut node_type_json.children { children.required = false; } @@ -647,6 +655,7 @@ pub fn generate_node_types_json( VariableType::Anonymous => anonymous_node_types.push(NodeInfoJSON { kind: name.clone(), named: false, + root: false, fields: None, children: None, subtypes: None, @@ -764,6 +773,7 @@ mod tests { NodeInfoJSON { kind: "v1".to_string(), named: true, + root: true, subtypes: None, children: None, fields: Some( @@ -801,6 +811,7 @@ mod tests { NodeInfoJSON { kind: ";".to_string(), named: false, + root: false, subtypes: None, children: None, fields: None @@ -811,6 +822,7 @@ mod tests { NodeInfoJSON { kind: "v2".to_string(), named: true, + root: false, subtypes: None, children: None, fields: None @@ -855,6 +867,7 @@ mod tests { NodeInfoJSON { kind: "v1".to_string(), named: true, + root: true, subtypes: None, children: None, fields: Some( @@ -892,6 +905,7 @@ mod tests { NodeInfoJSON { kind: ";".to_string(), named: false, + root: false, subtypes: None, children: None, fields: None @@ -902,6 +916,7 @@ mod tests { NodeInfoJSON { kind: "v2".to_string(), named: true, + root: false, subtypes: None, children: None, fields: None @@ -912,6 +927,7 @@ mod tests { NodeInfoJSON { kind: "v3".to_string(), named: true, + root: false, subtypes: None, children: None, fields: None @@ -957,6 +973,7 @@ mod tests { NodeInfoJSON { kind: "_v2".to_string(), named: true, + root: false, fields: None, children: None, subtypes: Some(vec![ @@ -980,6 +997,7 @@ mod tests { NodeInfoJSON { kind: "v1".to_string(), named: true, + root: true, subtypes: None, children: None, fields: Some( @@ -1042,6 +1060,7 @@ mod tests { NodeInfoJSON { kind: "v1".to_string(), named: true, + root: true, subtypes: None, children: Some(FieldInfoJSON { multiple: true, @@ -1079,6 +1098,7 @@ mod tests { NodeInfoJSON { kind: "v2".to_string(), named: true, + root: false, subtypes: None, children: Some(FieldInfoJSON { multiple: false, @@ -1123,6 +1143,7 @@ mod tests { NodeInfoJSON { kind: "v1".to_string(), named: true, + root: true, subtypes: None, children: Some(FieldInfoJSON { multiple: true, @@ -1196,6 +1217,7 @@ mod tests { Some(&NodeInfoJSON { kind: "identifier".to_string(), named: true, + root: false, subtypes: None, children: None, fields: None, @@ -1206,6 +1228,7 @@ mod tests { Some(&NodeInfoJSON { kind: "type_identifier".to_string(), named: true, + root: false, subtypes: None, children: None, fields: None, @@ -1247,6 +1270,7 @@ mod tests { NodeInfoJSON { kind: "a".to_string(), named: true, + root: true, subtypes: None, children: Some(FieldInfoJSON { multiple: true, @@ -1294,6 +1318,7 @@ mod tests { [NodeInfoJSON { kind: "script".to_string(), named: true, + root: true, fields: Some(BTreeMap::new()), children: None, subtypes: None @@ -1350,6 +1375,7 @@ mod tests { NodeInfoJSON { kind: "a".to_string(), named: true, + root: false, subtypes: None, children: None, fields: Some( @@ -1405,6 +1431,7 @@ mod tests { NodeInfoJSON { kind: "script".to_string(), named: true, + root: true, subtypes: None, // Only one node children: Some(FieldInfoJSON { @@ -1459,6 +1486,7 @@ mod tests { NodeInfoJSON { kind: "b".to_string(), named: true, + root: false, subtypes: None, children: Some(FieldInfoJSON { multiple: true, diff --git a/src/treesitter/cli/src/generate/prepare_grammar/extract_tokens.rs b/src/treesitter/cli/src/generate/prepare_grammar/extract_tokens.rs index 34d99dec3..80a0d712a 100644 --- a/src/treesitter/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/src/treesitter/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -28,11 +28,7 @@ pub(super) fn extract_tokens( let mut lexical_variables = Vec::with_capacity(extractor.extracted_variables.len()); for variable in extractor.extracted_variables { - lexical_variables.push(Variable { - name: variable.name, - kind: variable.kind, - rule: variable.rule, - }); + lexical_variables.push(variable); } // If a variable's entire rule was extracted as a token and that token didn't diff --git a/src/treesitter/cli/src/generate/prepare_grammar/flatten_grammar.rs b/src/treesitter/cli/src/generate/prepare_grammar/flatten_grammar.rs index 4b707beef..ff3f10e23 100644 --- a/src/treesitter/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/src/treesitter/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -15,7 +15,7 @@ struct RuleFlattener { } impl RuleFlattener { - fn new() -> Self { + const fn new() -> Self { Self { production: Production { steps: Vec::new(), @@ -173,7 +173,7 @@ fn flatten_variable(variable: Variable) -> SyntaxVariable { } } -fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool { +pub fn symbol_is_used(variables: &[SyntaxVariable], symbol: Symbol) -> bool { for variable in variables { for production in &variable.productions { for step in &production.steps { @@ -192,8 +192,10 @@ pub(super) fn flatten_grammar(grammar: ExtractedSyntaxGrammar) -> Result Result<()> { + // Check that no rule contains a named precedence that is not present in + // any of the `precedences` lists. + fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> { + match rule { + Rule::Repeat(rule) => validate(rule_name, rule, names), + Rule::Seq(elements) | Rule::Choice(elements) => elements + .iter() + .try_for_each(|e| validate(rule_name, e, names)), + Rule::Metadata { rule, params } => { + if let Precedence::Name(n) = ¶ms.precedence { + if !names.contains(n) { + return Err(anyhow!("Undeclared precedence '{n}' in rule '{rule_name}'")); + } + } + validate(rule_name, rule, names)?; + Ok(()) + } + _ => Ok(()), + } + } + // For any two precedence names `a` and `b`, if `a` comes before `b` // in some list, then it cannot come *after* `b` in any list. let mut pairs = HashMap::new(); @@ -120,27 +142,6 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> { } } - // Check that no rule contains a named precedence that is not present in - // any of the `precedences` lists. - fn validate(rule_name: &str, rule: &Rule, names: &HashSet<&String>) -> Result<()> { - match rule { - Rule::Repeat(rule) => validate(rule_name, rule, names), - Rule::Seq(elements) | Rule::Choice(elements) => elements - .iter() - .try_for_each(|e| validate(rule_name, e, names)), - Rule::Metadata { rule, params } => { - if let Precedence::Name(n) = ¶ms.precedence { - if !names.contains(n) { - return Err(anyhow!("Undeclared precedence '{n}' in rule '{rule_name}'")); - } - } - validate(rule_name, rule, names)?; - Ok(()) - } - _ => Ok(()), - } - } - let precedence_names = grammar .precedence_orderings .iter() diff --git a/src/treesitter/cli/src/generate/render.rs b/src/treesitter/cli/src/generate/render.rs index 34e1d426b..d70b23b6a 100644 --- a/src/treesitter/cli/src/generate/render.rs +++ b/src/treesitter/cli/src/generate/render.rs @@ -849,7 +849,7 @@ impl Generator { // are not at the end of the file. let check_eof = large_set.contains('\0'); if check_eof { - add!(self, "(!eof && ") + add!(self, "(!eof && "); } let char_set_info = &mut self.large_character_set_info[large_char_set_ix]; @@ -1663,7 +1663,7 @@ impl Generator { '\r' => add!(self, "'\\r'"), _ => { if c == '\0' { - add!(self, "0") + add!(self, "0"); } else if c == ' ' || c.is_ascii_graphic() { add!(self, "'{c}'"); } else { diff --git a/src/treesitter/cli/src/generate/rules.rs b/src/treesitter/cli/src/generate/rules.rs index ab74a14bc..d8124ef47 100644 --- a/src/treesitter/cli/src/generate/rules.rs +++ b/src/treesitter/cli/src/generate/rules.rs @@ -146,7 +146,7 @@ impl Rule { Self::Choice(elements) } - pub fn seq(rules: Vec) -> Self { + pub const fn seq(rules: Vec) -> Self { Self::Seq(rules) } } @@ -272,7 +272,7 @@ impl From for Rule { } impl TokenSet { - pub fn new() -> Self { + pub const fn new() -> Self { Self { terminal_bits: SmallBitVec::new(), external_bits: SmallBitVec::new(), diff --git a/src/treesitter/cli/src/generate/templates/_cargo.toml b/src/treesitter/cli/src/generate/templates/_cargo.toml index 91701b4b7..eea4eb4f1 100644 --- a/src/treesitter/cli/src/generate/templates/_cargo.toml +++ b/src/treesitter/cli/src/generate/templates/_cargo.toml @@ -19,8 +19,8 @@ path = "bindings/rust/lib.rs" [dependencies] tree-sitter-language = "0.1" -[dev-dependencies] -tree-sitter = { version = "0.22" } - [build-dependencies] cc = "1.0.87" + +[dev-dependencies] +tree-sitter = "0.23" diff --git a/src/treesitter/cli/src/generate/templates/gitignore b/src/treesitter/cli/src/generate/templates/gitignore index dd5cc848e..2fd9daca2 100644 --- a/src/treesitter/cli/src/generate/templates/gitignore +++ b/src/treesitter/cli/src/generate/templates/gitignore @@ -1,5 +1,4 @@ # Rust artifacts -Cargo.lock target/ # Node artifacts @@ -13,7 +12,6 @@ node_modules/ Package.resolved # Go artifacts -go.sum _obj/ # Python artifacts diff --git a/src/treesitter/cli/src/generate/templates/go.mod b/src/treesitter/cli/src/generate/templates/go.mod index d13d15630..26d2dbf13 100644 --- a/src/treesitter/cli/src/generate/templates/go.mod +++ b/src/treesitter/cli/src/generate/templates/go.mod @@ -2,4 +2,4 @@ module github.com/tree-sitter/tree-sitter-LOWER_PARSER_NAME go 1.23 -require github.com/tree-sitter/go-tree-sitter v0.23 +require github.com/tree-sitter/go-tree-sitter v0.23.1 diff --git a/src/treesitter/cli/src/generate/templates/lib.rs b/src/treesitter/cli/src/generate/templates/lib.rs index adb8e481d..4e3522ae9 100644 --- a/src/treesitter/cli/src/generate/templates/lib.rs +++ b/src/treesitter/cli/src/generate/templates/lib.rs @@ -1,6 +1,6 @@ //! This crate provides CAMEL_PARSER_NAME language support for the [tree-sitter][] parsing library. //! -//! Typically, you will use the [language][language func] function to add this language to a +//! Typically, you will use the [LANGUAGE][] constant to add this language to a //! tree-sitter [Parser][], and then use the parser to parse some code: //! //! ``` @@ -15,8 +15,6 @@ //! assert!(!tree.root_node().has_error()); //! ``` //! -//! [Language]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Language.html -//! [language func]: fn.language.html //! [Parser]: https://docs.rs/tree-sitter/*/tree_sitter/struct.Parser.html //! [tree-sitter]: https://tree-sitter.github.io/ @@ -26,7 +24,9 @@ extern "C" { fn tree_sitter_PARSER_NAME() -> *const (); } -/// The tree-sitter [`LanguageFn`] for this grammar. +/// The tree-sitter [`LanguageFn`][LanguageFn] for this grammar. +/// +/// [LanguageFn]: https://docs.rs/tree-sitter-language/*/tree_sitter_language/struct.LanguageFn.html pub const LANGUAGE: LanguageFn = unsafe { LanguageFn::from_raw(tree_sitter_PARSER_NAME) }; /// The content of the [`node-types.json`][] file for this grammar. diff --git a/src/treesitter/cli/src/generate/templates/package.swift b/src/treesitter/cli/src/generate/templates/package.swift index c1be93db1..d1053b46b 100644 --- a/src/treesitter/cli/src/generate/templates/package.swift +++ b/src/treesitter/cli/src/generate/templates/package.swift @@ -14,29 +14,6 @@ let package = Package( name: "TreeSitterCAMEL_PARSER_NAME", dependencies: [], path: ".", - exclude: [ - "Cargo.toml", - "Makefile", - "binding.gyp", - "bindings/c", - "bindings/go", - "bindings/node", - "bindings/python", - "bindings/rust", - "prebuilds", - "grammar.js", - "package.json", - "package-lock.json", - "pyproject.toml", - "setup.py", - "test", - "examples", - ".editorconfig", - ".github", - ".gitignore", - ".gitattributes", - ".gitmodules", - ], sources: [ "src/parser.c", // NOTE: if your language has an external scanner, add it here. diff --git a/src/treesitter/cli/src/main.rs b/src/treesitter/cli/src/main.rs index ce4aae95b..af89bd013 100644 --- a/src/treesitter/cli/src/main.rs +++ b/src/treesitter/cli/src/main.rs @@ -37,7 +37,6 @@ enum Commands { InitConfig(InitConfig), Generate(Generate), Build(Build), - BuildWasm(BuildWasm), Parse(Parse), Test(Test), Fuzz(Fuzz), @@ -124,19 +123,6 @@ struct Build { pub debug: bool, } -#[derive(Args)] -#[command(about = "Compile a parser to WASM", alias = "bw")] -struct BuildWasm { - #[arg( - short, - long, - help = "Run emscripten via docker even if it is installed locally" - )] - pub docker: bool, - #[arg(index = 1, num_args = 1, help = "The path to output the wasm file")] - pub path: Option, -} - #[derive(Args)] #[command(about = "Parse files", alias = "p")] struct Parse { @@ -206,12 +192,6 @@ struct Parse { #[derive(Args)] #[command(about = "Run a parser's tests", alias = "t")] struct Test { - #[arg( - long, - short, - help = "Only run corpus test cases whose name includes the given string" - )] - pub filter: Option, #[arg( long, short, @@ -263,13 +243,23 @@ struct Fuzz { pub skip: Option>, #[arg(long, help = "Subdirectory to the language")] pub subdir: Option, - #[arg(long, short, help = "Maximum number of edits to perform per fuzz test")] + #[arg(long, help = "Maximum number of edits to perform per fuzz test")] pub edits: Option, - #[arg(long, short, help = "Number of fuzzing iterations to run per test")] + #[arg(long, help = "Number of fuzzing iterations to run per test")] pub iterations: Option, - #[arg(long, short, help = "Regex pattern to filter tests")] - pub filter: Option, - #[arg(long, short, help = "Enable logging of graphs and input")] + #[arg( + long, + short, + help = "Only fuzz corpus test cases whose name matches the given regex" + )] + pub include: Option, + #[arg( + long, + short, + help = "Only fuzz corpus test cases whose name does not match the given regex" + )] + pub exclude: Option, + #[arg(long, help = "Enable logging of graphs and input")] pub log_graphs: bool, #[arg(long, short, help = "Enable parser logging")] pub log: bool, @@ -489,9 +479,9 @@ fn run() -> Result<()> { } Commands::Build(build_options) => { + let grammar_path = current_dir.join(build_options.path.as_deref().unwrap_or_default()); + if build_options.wasm { - let grammar_path = - current_dir.join(build_options.path.as_deref().unwrap_or_default()); let output_path = build_options.output.map(|path| current_dir.join(path)); let root_path = lookup_package_json_for_path(&grammar_path.join("package.json")) .map(|(p, _)| p.parent().unwrap().to_path_buf())?; @@ -504,8 +494,6 @@ fn run() -> Result<()> { build_options.docker, )?; } else { - let grammar_path = - current_dir.join(build_options.path.as_deref().unwrap_or_default()); let output_path = if let Some(ref path) = build_options.output { let path = Path::new(path); if path.is_absolute() { @@ -544,21 +532,6 @@ fn run() -> Result<()> { } } - Commands::BuildWasm(wasm_options) => { - eprintln!("`build-wasm` is deprecated and will be removed in v0.24.0. You should use `build --wasm` instead"); - let grammar_path = current_dir.join(wasm_options.path.unwrap_or_default()); - let root_path = lookup_package_json_for_path(&grammar_path.join("package.json")) - .map(|(p, _)| p.parent().unwrap().to_path_buf())?; - wasm::compile_language_to_wasm( - &loader, - Some(&root_path), - &grammar_path, - ¤t_dir, - None, - wasm_options.docker, - )?; - } - Commands::Parse(parse_options) => { let config = Config::load(parse_options.config_path)?; let output = if parse_options.output_dot { @@ -711,7 +684,6 @@ fn run() -> Result<()> { path: test_corpus_dir, debug: test_options.debug, debug_graph: test_options.debug_graph, - filter: test_options.filter.as_deref(), include: test_options.include, exclude: test_options.exclude, update: test_options.update, @@ -770,7 +742,8 @@ fn run() -> Result<()> { subdir: fuzz_options.subdir, edits: fuzz_options.edits.unwrap_or(*EDIT_COUNT), iterations: fuzz_options.iterations.unwrap_or(*ITERATION_COUNT), - filter: fuzz_options.filter, + include: fuzz_options.include, + exclude: fuzz_options.exclude, log_graphs: fuzz_options.log_graphs || *LOG_GRAPH_ENABLED, log: fuzz_options.log || *LOG_ENABLED, }; diff --git a/src/treesitter/cli/src/query.rs b/src/treesitter/cli/src/query.rs index bffa05882..f32c5450a 100644 --- a/src/treesitter/cli/src/query.rs +++ b/src/treesitter/cli/src/query.rs @@ -9,7 +9,7 @@ use std::{ use anyhow::{Context, Result}; use tree_sitter::{Language, Parser, Point, Query, QueryCursor}; -use crate::query_testing; +use crate::query_testing::{self, to_utf8_point}; #[allow(clippy::too_many_arguments)] pub fn query_files_at_paths( @@ -70,8 +70,8 @@ pub fn query_files_at_paths( } results.push(query_testing::CaptureInfo { name: (*capture_name).to_string(), - start: capture.node.start_position(), - end: capture.node.end_position(), + start: to_utf8_point(capture.node.start_position(), source_code.as_slice()), + end: to_utf8_point(capture.node.end_position(), source_code.as_slice()), }); } } else { @@ -100,8 +100,8 @@ pub fn query_files_at_paths( } results.push(query_testing::CaptureInfo { name: (*capture_name).to_string(), - start: capture.node.start_position(), - end: capture.node.end_position(), + start: to_utf8_point(capture.node.start_position(), source_code.as_slice()), + end: to_utf8_point(capture.node.end_position(), source_code.as_slice()), }); } } diff --git a/src/treesitter/cli/src/query_testing.rs b/src/treesitter/cli/src/query_testing.rs index cdf2e9882..1020874b2 100644 --- a/src/treesitter/cli/src/query_testing.rs +++ b/src/treesitter/cli/src/query_testing.rs @@ -1,6 +1,7 @@ use std::fs; use anyhow::{anyhow, Result}; +use bstr::{BStr, ByteSlice}; use lazy_static::lazy_static; use regex::Regex; use tree_sitter::{Language, Parser, Point}; @@ -9,25 +10,70 @@ lazy_static! { static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap(); } +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Utf8Point { + pub row: usize, + pub column: usize, +} + +impl std::fmt::Display for Utf8Point { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "({}, {})", self.row, self.column) + } +} + +impl Utf8Point { + pub const fn new(row: usize, column: usize) -> Self { + Self { row, column } + } +} + +pub fn to_utf8_point(point: Point, source: &[u8]) -> Utf8Point { + if point.column == 0 { + return Utf8Point::new(point.row, 0); + } + + let bstr = BStr::new(source); + let line = bstr.lines_with_terminator().nth(point.row).unwrap(); + let mut utf8_column = 0; + + for (_, grapheme_end, _) in line.grapheme_indices() { + utf8_column += 1; + if grapheme_end >= point.column { + break; + } + } + + Utf8Point { + row: point.row, + column: utf8_column, + } +} + #[derive(Debug, Eq, PartialEq)] pub struct CaptureInfo { pub name: String, - pub start: Point, - pub end: Point, + pub start: Utf8Point, + pub end: Utf8Point, } #[derive(Debug, PartialEq, Eq)] pub struct Assertion { - pub position: Point, + pub position: Utf8Point, pub negative: bool, pub expected_capture_name: String, } impl Assertion { #[must_use] - pub fn new(row: usize, col: usize, negative: bool, expected_capture_name: String) -> Self { + pub const fn new( + row: usize, + col: usize, + negative: bool, + expected_capture_name: String, + ) -> Self { Self { - position: Point::new(row, col), + position: Utf8Point::new(row, col), negative, expected_capture_name, } @@ -62,7 +108,7 @@ pub fn parse_position_comments( if let Ok(text) = node.utf8_text(source) { let mut position = node.start_position(); if position.row > 0 { - // Find the arrow character ("^" or '<-") in the comment. A left arrow + // Find the arrow character ("^" or "<-") in the comment. A left arrow // refers to the column where the comment node starts. An up arrow refers // to its own column. let mut has_left_caret = false; @@ -103,7 +149,7 @@ pub fn parse_position_comments( { assertion_ranges.push((node.start_position(), node.end_position())); result.push(Assertion { - position, + position: to_utf8_point(position, source), negative, expected_capture_name: mat.as_str().to_string(), }); diff --git a/src/treesitter/cli/src/test.rs b/src/treesitter/cli/src/test.rs index 250c8efe9..dbff512ef 100644 --- a/src/treesitter/cli/src/test.rs +++ b/src/treesitter/cli/src/test.rs @@ -1,5 +1,5 @@ use std::{ - collections::{BTreeMap, HashSet}, + collections::BTreeMap, ffi::OsStr, fs, io::{self, Write}, @@ -59,6 +59,7 @@ pub enum TestEntry { header_delim_len: usize, divider_delim_len: usize, has_fields: bool, + attributes_str: String, attributes: TestAttributes, }, } @@ -98,7 +99,6 @@ pub struct TestOptions<'a> { pub path: PathBuf, pub debug: bool, pub debug_graph: bool, - pub filter: Option<&'a str>, pub include: Option, pub exclude: Option, pub update: bool, @@ -171,10 +171,22 @@ pub fn run_tests_at_path(parser: &mut Parser, opts: &mut TestOptions) -> Result< print_diff_key(); } for (i, (name, actual, expected)) in failures.iter().enumerate() { - println!("\n {}. {name}:", i + 1); - let actual = format_sexp(actual, 2); - let expected = format_sexp(expected, 2); - print_diff(&actual, &expected, opts.color); + if expected == "NO ERROR" { + println!("\n {}. {name}:\n", i + 1); + println!(" Expected an ERROR node, but got:"); + println!( + " {}", + paint( + opts.color.then_some(AnsiColor::Red), + &format_sexp(actual, 2) + ) + ); + } else { + println!("\n {}. {name}:", i + 1); + let actual = format_sexp(actual, 2); + let expected = format_sexp(expected, 2); + print_diff(&actual, &expected, opts.color); + } } if has_parse_errors { @@ -203,9 +215,8 @@ pub fn get_test_info<'test>( } => { if *test_num == target_test { return Some((name, input, attributes.languages.clone())); - } else { - *test_num += 1; } + *test_num += 1; } TestEntry::Group { children, .. } => { for child in children { @@ -326,7 +337,7 @@ fn run_tests( opts: &mut TestOptions, mut indent_level: i32, failures: &mut Vec<(String, String, String)>, - corrected_entries: &mut Vec<(String, String, String, usize, usize)>, + corrected_entries: &mut Vec<(String, String, String, String, usize, usize)>, has_parse_errors: &mut bool, ) -> Result { match test_entry { @@ -337,6 +348,7 @@ fn run_tests( header_delim_len, divider_delim_len, has_fields, + attributes_str, attributes, } => { print!("{}", " ".repeat(indent_level as usize)); @@ -376,12 +388,42 @@ fn run_tests( opts.test_num, paint(opts.color.then_some(AnsiColor::Green), &name) ); + if opts.update { + let input = String::from_utf8(input.clone()).unwrap(); + let output = format_sexp(&output, 0); + corrected_entries.push(( + name.clone(), + input, + output, + attributes_str.clone(), + header_delim_len, + divider_delim_len, + )); + } } else { + if opts.update { + let input = String::from_utf8(input.clone()).unwrap(); + // Keep the original `expected` output if the actual output has no error + let output = format_sexp(&output, 0); + corrected_entries.push(( + name.clone(), + input, + output, + attributes_str.clone(), + header_delim_len, + divider_delim_len, + )); + } println!( "{:>3}.  {}", opts.test_num, paint(opts.color.then_some(AnsiColor::Red), &name) ); + failures.push(( + name.clone(), + tree.root_node().to_sexp(), + "NO ERROR".to_string(), + )); } if attributes.fail_fast { @@ -406,6 +448,7 @@ fn run_tests( name.clone(), input, output, + attributes_str.clone(), header_delim_len, divider_delim_len, )); @@ -429,6 +472,7 @@ fn run_tests( name.clone(), input, expected_output, + attributes_str.clone(), header_delim_len, divider_delim_len, )); @@ -437,6 +481,7 @@ fn run_tests( name.clone(), input, actual_output, + attributes_str.clone(), header_delim_len, divider_delim_len, )); @@ -470,64 +515,74 @@ fn run_tests( } TestEntry::Group { name, - mut children, + children, file_path, } => { - // track which tests are being skipped to maintain consistent numbering while using - // filters - let mut skipped_tests = HashSet::new(); + if children.is_empty() { + return Ok(true); + } + + indent_level += 1; let mut advance_counter = opts.test_num; - children.retain(|child| match child { + let failure_count = failures.len(); + let mut has_printed = false; + let mut skipped_tests = 0; + + let matches_filter = |name: &str, opts: &TestOptions| { + if let Some(include) = &opts.include { + include.is_match(name) + } else if let Some(exclude) = &opts.exclude { + !exclude.is_match(name) + } else { + true + } + }; + + let mut should_skip = |entry: &TestEntry, opts: &TestOptions| match entry { TestEntry::Example { name, .. } => { - if let Some(filter) = opts.filter { - if !name.contains(filter) { - skipped_tests.insert(advance_counter); - advance_counter += 1; - return false; - } - } - if let Some(include) = &opts.include { - if !include.is_match(name) { - skipped_tests.insert(advance_counter); - advance_counter += 1; - return false; - } - } - if let Some(exclude) = &opts.exclude { - if exclude.is_match(name) { - skipped_tests.insert(advance_counter); - advance_counter += 1; - return false; - } - } advance_counter += 1; - true + !matches_filter(name, opts) } TestEntry::Group { .. } => { - advance_counter += count_subtests(child); - true + advance_counter += count_subtests(entry); + false } - }); - - if children.is_empty() { - opts.test_num = advance_counter; - return Ok(true); - } - - if indent_level > 0 { - print!("{}", " ".repeat(indent_level as usize)); - println!("{name}:"); - } - - let failure_count = failures.len(); + }; - indent_level += 1; for child in children { - if let TestEntry::Example { .. } = child { - while skipped_tests.remove(&opts.test_num) { + if let TestEntry::Example { + ref name, + ref input, + ref output, + ref attributes_str, + header_delim_len, + divider_delim_len, + .. + } = child + { + if should_skip(&child, opts) { + let input = String::from_utf8(input.clone()).unwrap(); + let output = format_sexp(output, 0); + corrected_entries.push(( + name.clone(), + input, + output, + attributes_str.clone(), + header_delim_len, + divider_delim_len, + )); + opts.test_num += 1; + skipped_tests += 1; + + continue; } } + if !has_printed && indent_level > 1 { + has_printed = true; + print!("{}", " ".repeat((indent_level - 1) as usize)); + println!("{name}:"); + } if !run_tests( parser, child, @@ -542,7 +597,7 @@ fn run_tests( } } - opts.test_num += skipped_tests.len(); + opts.test_num += skipped_tests; if let Some(file_path) = file_path { if opts.update && failures.len() - failure_count > 0 { @@ -566,7 +621,7 @@ fn count_subtests(test_entry: &TestEntry) -> usize { fn write_tests( file_path: &Path, - corrected_entries: &[(String, String, String, usize, usize)], + corrected_entries: &[(String, String, String, String, usize, usize)], ) -> Result<()> { let mut buffer = fs::File::create(file_path)?; write_tests_to_buffer(&mut buffer, corrected_entries) @@ -574,9 +629,9 @@ fn write_tests( fn write_tests_to_buffer( buffer: &mut impl Write, - corrected_entries: &[(String, String, String, usize, usize)], + corrected_entries: &[(String, String, String, String, usize, usize)], ) -> Result<()> { - for (i, (name, input, output, header_delim_len, divider_delim_len)) in + for (i, (name, input, output, attributes_str, header_delim_len, divider_delim_len)) in corrected_entries.iter().enumerate() { if i > 0 { @@ -584,8 +639,13 @@ fn write_tests_to_buffer( } writeln!( buffer, - "{}\n{name}\n{}\n{input}\n{}\n\n{}", + "{}\n{name}\n{}{}\n{input}\n{}\n\n{}", "=".repeat(*header_delim_len), + if attributes_str.is_empty() { + attributes_str.clone() + } else { + format!("{attributes_str}\n") + }, "=".repeat(*header_delim_len), "-".repeat(*divider_delim_len), output.trim() @@ -643,6 +703,7 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - let mut children = Vec::new(); let bytes = content.as_bytes(); let mut prev_name = String::new(); + let mut prev_attributes_str = String::new(); let mut prev_header_end = 0; // Find the first test header in the file, and determine if it has a @@ -673,17 +734,20 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - .map_or("".as_bytes(), |m| m.as_bytes()); let mut test_name = String::new(); + let mut attributes_str = String::new(); + let mut seen_marker = false; - for line in str::from_utf8(test_name_and_markers) - .unwrap() - .lines() + let test_name_and_markers = str::from_utf8(test_name_and_markers).unwrap(); + for line in test_name_and_markers + .split_inclusive('\n') .filter(|s| !s.is_empty()) { - match line.split('(').next().unwrap() { + let trimmed_line = line.trim(); + match trimmed_line.split('(').next().unwrap() { ":skip" => (seen_marker, skip) = (true, true), ":platform" => { - if let Some(platforms) = line.strip_prefix(':').and_then(|s| { + if let Some(platforms) = trimmed_line.strip_prefix(':').and_then(|s| { s.strip_prefix("platform(") .and_then(|s| s.strip_suffix(')')) }) { @@ -696,7 +760,7 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - ":fail-fast" => (seen_marker, fail_fast) = (true, true), ":error" => (seen_marker, error) = (true, true), ":language" => { - if let Some(lang) = line.strip_prefix(':').and_then(|s| { + if let Some(lang) = trimmed_line.strip_prefix(':').and_then(|s| { s.strip_prefix("language(") .and_then(|s| s.strip_suffix(')')) }) { @@ -706,11 +770,11 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - } _ if !seen_marker => { test_name.push_str(line); - test_name.push('\n'); } _ => {} } } + attributes_str.push_str(test_name_and_markers.strip_prefix(&test_name).unwrap()); // prefer skip over error, both shouldn't be set if skip { @@ -729,10 +793,16 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - } else { Some(test_name.trim_end().to_string()) }; + let attributes_str = if attributes_str.is_empty() { + None + } else { + Some(attributes_str.trim_end().to_string()) + }; Some(( header_delim_len, header_range, test_name, + attributes_str, TestAttributes { skip, platform: platform.unwrap_or(true), @@ -747,12 +817,15 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - }); let (mut prev_header_len, mut prev_attributes) = (80, TestAttributes::default()); - for (header_delim_len, header_range, test_name, attributes) in header_matches.chain(Some(( - 80, - bytes.len()..bytes.len(), - None, - TestAttributes::default(), - ))) { + for (header_delim_len, header_range, test_name, attributes_str, attributes) in header_matches + .chain(Some(( + 80, + bytes.len()..bytes.len(), + None, + None, + TestAttributes::default(), + ))) + { // Find the longest line of dashes following each test description. That line // separates the input from the expected output. Ignore any matches whose suffix // does not match the first suffix in the file. @@ -804,6 +877,7 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - header_delim_len: prev_header_len, divider_delim_len, has_fields, + attributes_str: prev_attributes_str, attributes: prev_attributes, }; @@ -813,6 +887,7 @@ fn parse_test_content(name: String, content: &str, file_path: Option) - } prev_attributes = attributes; prev_name = test_name.unwrap_or_default(); + prev_attributes_str = attributes_str.unwrap_or_default(); prev_header_len = header_delim_len; prev_header_end = header_range.end; } @@ -866,6 +941,7 @@ d header_delim_len: 15, divider_delim_len: 3, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), }, TestEntry::Example { @@ -875,6 +951,7 @@ d header_delim_len: 16, divider_delim_len: 3, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), }, ], @@ -925,6 +1002,7 @@ abc header_delim_len: 18, divider_delim_len: 7, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), }, TestEntry::Example { @@ -934,6 +1012,7 @@ abc header_delim_len: 25, divider_delim_len: 19, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), }, ], @@ -999,6 +1078,7 @@ abc "title 1".to_string(), "input 1".to_string(), "output 1".to_string(), + String::new(), 80, 80, ), @@ -1006,6 +1086,7 @@ abc "title 2".to_string(), "input 2".to_string(), "output 2".to_string(), + String::new(), 80, 80, ), @@ -1086,6 +1167,7 @@ code header_delim_len: 18, divider_delim_len: 3, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), }, TestEntry::Example { @@ -1095,6 +1177,7 @@ code header_delim_len: 18, divider_delim_len: 3, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), }, TestEntry::Example { @@ -1104,6 +1187,7 @@ code header_delim_len: 25, divider_delim_len: 3, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), } ], @@ -1177,6 +1261,7 @@ NOT A TEST HEADER header_delim_len: 18, divider_delim_len: 3, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), }, TestEntry::Example { @@ -1186,6 +1271,7 @@ NOT A TEST HEADER header_delim_len: 18, divider_delim_len: 3, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), }, TestEntry::Example { @@ -1195,6 +1281,7 @@ NOT A TEST HEADER header_delim_len: 25, divider_delim_len: 3, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), } ], @@ -1240,6 +1327,7 @@ code with ---- header_delim_len: 15, divider_delim_len: 3, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), }, TestEntry::Example { @@ -1249,6 +1337,7 @@ code with ---- header_delim_len: 20, divider_delim_len: 3, has_fields: false, + attributes_str: String::new(), attributes: TestAttributes::default(), } ] @@ -1286,6 +1375,7 @@ a header_delim_len: 21, divider_delim_len: 3, has_fields: false, + attributes_str: ":skip".to_string(), attributes: TestAttributes { skip: true, platform: true, @@ -1342,6 +1432,7 @@ a header_delim_len: 25, divider_delim_len: 3, has_fields: false, + attributes_str: format!(":platform({})\n:fail-fast", std::env::consts::OS), attributes: TestAttributes { skip: false, platform: true, @@ -1357,6 +1448,11 @@ a header_delim_len: 29, divider_delim_len: 3, has_fields: false, + attributes_str: if std::env::consts::OS == "linux" { + ":platform(macos)\n:language(foo)".to_string() + } else { + ":platform(linux)\n:language(foo)".to_string() + }, attributes: TestAttributes { skip: false, platform: false, diff --git a/src/treesitter/cli/src/test_highlight.rs b/src/treesitter/cli/src/test_highlight.rs index 541d98fd3..34be438fb 100644 --- a/src/treesitter/cli/src/test_highlight.rs +++ b/src/treesitter/cli/src/test_highlight.rs @@ -7,7 +7,7 @@ use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, H use tree_sitter_loader::{Config, Loader}; use super::{ - query_testing::{parse_position_comments, Assertion}, + query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point}, test::paint, util, }; @@ -141,7 +141,7 @@ fn test_highlights_indented( } pub fn iterate_assertions( assertions: &[Assertion], - highlights: &[(Point, Point, Highlight)], + highlights: &[(Utf8Point, Utf8Point, Highlight)], highlight_names: &[String], ) -> Result { // Iterate through all of the highlighting assertions, checking each one against the @@ -224,7 +224,7 @@ pub fn get_highlight_positions( highlighter: &mut Highlighter, highlight_config: &HighlightConfiguration, source: &[u8], -) -> Result> { +) -> Result> { let mut row = 0; let mut column = 0; let mut byte_offset = 0; @@ -261,7 +261,10 @@ pub fn get_highlight_positions( } } if let Some(highlight) = highlight_stack.last() { - result.push((start_position, Point::new(row, column), *highlight)); + let utf8_start_position = to_utf8_point(start_position, source.as_bytes()); + let utf8_end_position = + to_utf8_point(Point::new(row, column), source.as_bytes()); + result.push((utf8_start_position, utf8_end_position, *highlight)); } } } diff --git a/src/treesitter/cli/src/test_tags.rs b/src/treesitter/cli/src/test_tags.rs index c5a1dc021..5b290bda9 100644 --- a/src/treesitter/cli/src/test_tags.rs +++ b/src/treesitter/cli/src/test_tags.rs @@ -2,12 +2,11 @@ use std::{fs, path::Path}; use anstyle::AnsiColor; use anyhow::{anyhow, Result}; -use tree_sitter::Point; use tree_sitter_loader::{Config, Loader}; use tree_sitter_tags::{TagsConfiguration, TagsContext}; use super::{ - query_testing::{parse_position_comments, Assertion}, + query_testing::{parse_position_comments, to_utf8_point, Assertion, Utf8Point}, test::paint, util, }; @@ -168,7 +167,7 @@ pub fn get_tag_positions( tags_context: &mut TagsContext, tags_config: &TagsConfiguration, source: &[u8], -) -> Result> { +) -> Result> { let (tags_iter, _has_error) = tags_context.generate_tags(tags_config, source, None)?; let tag_positions = tags_iter .filter_map(std::result::Result::ok) @@ -179,7 +178,11 @@ pub fn get_tag_positions( } else { format!("reference.{tag_postfix}") }; - (tag.span.start, tag.span.end, tag_name) + ( + to_utf8_point(tag.span.start, source), + to_utf8_point(tag.span.end, source), + tag_name, + ) }) .collect(); Ok(tag_positions) diff --git a/src/treesitter/cli/src/tests/corpus_test.rs b/src/treesitter/cli/src/tests/corpus_test.rs index ffa328e2e..f81d7543c 100644 --- a/src/treesitter/cli/src/tests/corpus_test.rs +++ b/src/treesitter/cli/src/tests/corpus_test.rs @@ -11,8 +11,8 @@ use crate::{ edits::{get_random_edit, invert_edit}, flatten_tests, new_seed, random::Rand, - EDIT_COUNT, EXAMPLE_FILTER, ITERATION_COUNT, LANGUAGE_FILTER, LOG_GRAPH_ENABLED, - START_SEED, + EDIT_COUNT, EXAMPLE_EXCLUDE, EXAMPLE_INCLUDE, ITERATION_COUNT, LANGUAGE_FILTER, + LOG_GRAPH_ENABLED, START_SEED, }, generate, parse::perform_edit, @@ -130,15 +130,27 @@ pub fn test_language_corpus( let main_tests = parse_tests(&corpus_dir).unwrap(); let error_tests = parse_tests(&error_corpus_file).unwrap_or_default(); let template_tests = parse_tests(&template_corpus_file).unwrap_or_default(); - let mut tests = flatten_tests(main_tests, EXAMPLE_FILTER.as_ref()); - tests.extend(flatten_tests(error_tests, EXAMPLE_FILTER.as_ref())); + let mut tests = flatten_tests( + main_tests, + EXAMPLE_INCLUDE.as_ref(), + EXAMPLE_EXCLUDE.as_ref(), + ); + tests.extend(flatten_tests( + error_tests, + EXAMPLE_INCLUDE.as_ref(), + EXAMPLE_EXCLUDE.as_ref(), + )); tests.extend( - flatten_tests(template_tests, EXAMPLE_FILTER.as_ref()) - .into_iter() - .map(|mut t| { - t.template_delimiters = Some(("<%", "%>")); - t - }), + flatten_tests( + template_tests, + EXAMPLE_INCLUDE.as_ref(), + EXAMPLE_EXCLUDE.as_ref(), + ) + .into_iter() + .map(|mut t| { + t.template_delimiters = Some(("<%", "%>")); + t + }), ); tests.retain(|t| t.languages[0].is_empty() || t.languages.contains(&Box::from(language_dir))); @@ -345,7 +357,7 @@ fn test_feature_corpus_files() { let generate_result = generate::generate_parser_for_grammar(&grammar_json); if error_message_path.exists() { - if EXAMPLE_FILTER.is_some() { + if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() { continue; } @@ -377,7 +389,7 @@ fn test_feature_corpus_files() { let c_code = generate_result.unwrap().1; let language = get_test_language(language_name, &c_code, Some(&test_path)); let test = parse_tests(&corpus_path).unwrap(); - let tests = flatten_tests(test, EXAMPLE_FILTER.as_ref()); + let tests = flatten_tests(test, EXAMPLE_INCLUDE.as_ref(), EXAMPLE_EXCLUDE.as_ref()); if !tests.is_empty() { eprintln!("test language: {language_name:?}"); diff --git a/src/treesitter/cli/src/tests/node_test.rs b/src/treesitter/cli/src/tests/node_test.rs index e05ed9326..4b987a53d 100644 --- a/src/treesitter/cli/src/tests/node_test.rs +++ b/src/treesitter/cli/src/tests/node_test.rs @@ -290,6 +290,16 @@ fn test_parent_of_zero_width_node() { function_definition ); assert_eq!(function_definition.child_containing_descendant(block), None); + + let code = ""; + parser.set_language(&get_language("html")).unwrap(); + + let tree = parser.parse(code, None).unwrap(); + let root = tree.root_node(); + let script_element = root.child(0).unwrap(); + let raw_text = script_element.child(1).unwrap(); + let parent = raw_text.parent().unwrap(); + assert_eq!(parent, script_element); } #[test] @@ -308,6 +318,13 @@ fn test_node_field_name_for_child() { .child_by_field_name("value") .unwrap(); + // ------------------- + // left: (identifier) 0 + // operator: "+" 1 <--- (not a named child) + // (comment) 2 <--- (is an extra) + // right: (identifier) 3 + // ------------------- + assert_eq!(binary_expression_node.field_name_for_child(0), Some("left")); assert_eq!( binary_expression_node.field_name_for_child(1), @@ -323,6 +340,44 @@ fn test_node_field_name_for_child() { assert_eq!(binary_expression_node.field_name_for_child(4), None); } +#[test] +fn test_node_field_name_for_named_child() { + let mut parser = Parser::new(); + parser.set_language(&get_language("c")).unwrap(); + let tree = parser + .parse("int w = x + /* y is special! */ y;", None) + .unwrap(); + let translation_unit_node = tree.root_node(); + let declaration_node = translation_unit_node.named_child(0).unwrap(); + + let binary_expression_node = declaration_node + .child_by_field_name("declarator") + .unwrap() + .child_by_field_name("value") + .unwrap(); + + // ------------------- + // left: (identifier) 0 + // operator: "+" _ <--- (not a named child) + // (comment) 1 <--- (is an extra) + // right: (identifier) 2 + // ------------------- + + assert_eq!( + binary_expression_node.field_name_for_named_child(0), + Some("left") + ); + // The comment should not have a field name, as it's just an extra + assert_eq!(binary_expression_node.field_name_for_named_child(1), None); + // The operator is not a named child, so the named child at index 2 is the right child + assert_eq!( + binary_expression_node.field_name_for_named_child(2), + Some("right") + ); + // Negative test - Not a valid child index + assert_eq!(binary_expression_node.field_name_for_named_child(3), None); +} + #[test] fn test_node_child_by_field_name_with_extra_hidden_children() { let mut parser = Parser::new(); diff --git a/src/treesitter/cli/src/tests/query_test.rs b/src/treesitter/cli/src/tests/query_test.rs index f37821c1a..d404d19a7 100644 --- a/src/treesitter/cli/src/tests/query_test.rs +++ b/src/treesitter/cli/src/tests/query_test.rs @@ -5146,3 +5146,28 @@ fn test_query_on_empty_source_code() { &[(0, vec![("program", "")])], ); } + +#[test] +fn test_query_execution_with_timeout() { + let language = get_language("javascript"); + let mut parser = Parser::new(); + parser.set_language(&language).unwrap(); + + let source_code = "function foo() { while (true) { } }\n".repeat(1000); + let tree = parser.parse(&source_code, None).unwrap(); + + let query = Query::new(&language, "(function_declaration) @function").unwrap(); + let mut cursor = QueryCursor::new(); + + cursor.set_timeout_micros(1000); + let matches = cursor + .matches(&query, tree.root_node(), source_code.as_bytes()) + .count(); + assert!(matches < 1000); + + cursor.set_timeout_micros(0); + let matches = cursor + .matches(&query, tree.root_node(), source_code.as_bytes()) + .count(); + assert_eq!(matches, 1000); +} diff --git a/src/treesitter/cli/src/tests/test_highlight_test.rs b/src/treesitter/cli/src/tests/test_highlight_test.rs index 8699c2a6f..054e33f82 100644 --- a/src/treesitter/cli/src/tests/test_highlight_test.rs +++ b/src/treesitter/cli/src/tests/test_highlight_test.rs @@ -1,9 +1,9 @@ -use tree_sitter::{Parser, Point}; +use tree_sitter::Parser; use tree_sitter_highlight::{Highlight, Highlighter}; use super::helpers::fixtures::{get_highlight_config, get_language, test_loader}; use crate::{ - query_testing::{parse_position_comments, Assertion}, + query_testing::{parse_position_comments, Assertion, Utf8Point}, test_highlight::get_highlight_positions, }; @@ -28,6 +28,9 @@ fn test_highlight_test_with_basic_test() { " // ^ variable", " // ^ !variable", "};", + "var y̆y̆y̆y̆ = function() {}", + " // ^ function", + " // ^ keyword", ] .join("\n"); @@ -40,6 +43,8 @@ fn test_highlight_test_with_basic_test() { Assertion::new(1, 11, false, String::from("keyword")), Assertion::new(4, 9, false, String::from("variable")), Assertion::new(4, 11, true, String::from("variable")), + Assertion::new(8, 5, false, String::from("function")), + Assertion::new(8, 11, false, String::from("keyword")), ] ); @@ -50,13 +55,16 @@ fn test_highlight_test_with_basic_test() { assert_eq!( highlight_positions, &[ - (Point::new(1, 0), Point::new(1, 3), Highlight(2)), // "var" - (Point::new(1, 4), Point::new(1, 7), Highlight(0)), // "abc" - (Point::new(1, 10), Point::new(1, 18), Highlight(2)), // "function" - (Point::new(1, 19), Point::new(1, 20), Highlight(1)), // "d" - (Point::new(4, 2), Point::new(4, 8), Highlight(2)), // "return" - (Point::new(4, 9), Point::new(4, 10), Highlight(1)), // "d" - (Point::new(4, 13), Point::new(4, 14), Highlight(1)), // "e" + (Utf8Point::new(1, 0), Utf8Point::new(1, 3), Highlight(2)), // "var" + (Utf8Point::new(1, 4), Utf8Point::new(1, 7), Highlight(0)), // "abc" + (Utf8Point::new(1, 10), Utf8Point::new(1, 18), Highlight(2)), // "function" + (Utf8Point::new(1, 19), Utf8Point::new(1, 20), Highlight(1)), // "d" + (Utf8Point::new(4, 2), Utf8Point::new(4, 8), Highlight(2)), // "return" + (Utf8Point::new(4, 9), Utf8Point::new(4, 10), Highlight(1)), // "d" + (Utf8Point::new(4, 13), Utf8Point::new(4, 14), Highlight(1)), // "e" + (Utf8Point::new(8, 0), Utf8Point::new(8, 3), Highlight(2)), // "var" + (Utf8Point::new(8, 4), Utf8Point::new(8, 8), Highlight(0)), // "y̆y̆y̆y̆" + (Utf8Point::new(8, 11), Utf8Point::new(8, 19), Highlight(2)), // "function" ] ); } diff --git a/src/treesitter/cli/src/tests/test_tags_test.rs b/src/treesitter/cli/src/tests/test_tags_test.rs index 5e7bf9c93..5f7b88fc2 100644 --- a/src/treesitter/cli/src/tests/test_tags_test.rs +++ b/src/treesitter/cli/src/tests/test_tags_test.rs @@ -1,9 +1,9 @@ -use tree_sitter::{Parser, Point}; +use tree_sitter::Parser; use tree_sitter_tags::TagsContext; use super::helpers::fixtures::{get_language, get_tags_config}; use crate::{ - query_testing::{parse_position_comments, Assertion}, + query_testing::{parse_position_comments, Assertion, Utf8Point}, test_tags::get_tag_positions, }; @@ -43,18 +43,18 @@ fn test_tags_test_with_basic_test() { tag_positions, &[ ( - Point::new(1, 4), - Point::new(1, 7), + Utf8Point::new(1, 4), + Utf8Point::new(1, 7), "definition.function".to_string() ), ( - Point::new(3, 8), - Point::new(3, 11), + Utf8Point::new(3, 8), + Utf8Point::new(3, 11), "reference.call".to_string() ), ( - Point::new(5, 11), - Point::new(5, 12), + Utf8Point::new(5, 11), + Utf8Point::new(5, 12), "reference.call".to_string() ), ] diff --git a/src/treesitter/cli/src/tests/tree_test.rs b/src/treesitter/cli/src/tests/tree_test.rs index 3e4b27758..62cc23cd3 100644 --- a/src/treesitter/cli/src/tests/tree_test.rs +++ b/src/treesitter/cli/src/tests/tree_test.rs @@ -702,6 +702,33 @@ fn test_consistency_with_mid_codepoint_edit() { assert_eq!(tree3.root_node().to_sexp(), tree.root_node().to_sexp()); } +#[test] +fn test_tree_cursor_on_aliased_root_with_extra_child() { + let source = r#" +fn main() { + C/* hi */::::E; +} +"#; + + let mut parser = Parser::new(); + parser.set_language(&get_language("rust")).unwrap(); + + let tree = parser.parse(source, None).unwrap(); + + let function = tree.root_node().child(0).unwrap(); + let block = function.child(3).unwrap(); + let expression_statement = block.child(1).unwrap(); + let scoped_identifier = expression_statement.child(0).unwrap(); + let generic_type = scoped_identifier.child(0).unwrap(); + assert_eq!(generic_type.kind(), "generic_type"); + + let mut cursor = generic_type.walk(); + assert!(cursor.goto_first_child()); + assert_eq!(cursor.node().kind(), "type_identifier"); + assert!(cursor.goto_next_sibling()); + assert_eq!(cursor.node().kind(), "block_comment"); +} + fn index_of(text: &[u8], substring: &str) -> usize { str::from_utf8(text).unwrap().find(substring).unwrap() } diff --git a/src/treesitter/docs/section-3-creating-parsers.md b/src/treesitter/docs/section-3-creating-parsers.md index ca4c65cf7..e06817327 100644 --- a/src/treesitter/docs/section-3-creating-parsers.md +++ b/src/treesitter/docs/section-3-creating-parsers.md @@ -14,7 +14,7 @@ Developing Tree-sitter grammars can have a difficult learning curve, but once yo In order to develop a Tree-sitter parser, there are two dependencies that you need to install: * **Node.js** - Tree-sitter grammars are written in JavaScript, and Tree-sitter uses [Node.js][node.js] to interpret JavaScript files. It requires the `node` command to be in one of the directories in your [`PATH`][path-env]. You'll need Node.js version 6.0 or greater. -* **A C Compiler** - Tree-sitter creates parsers that are written in C. In order to run and test these parsers with the `tree-sitter parse` or `tree-sitter test` commands, you must have a C/C++ compiler installed. Tree-sitter will try to look for these compilers in the standard places for each platform. +* **A C Compiler** - Tree-sitter creates parsers that are written in C. In order to run and test these parsers with the `tree-sitter parse` or `tree-sitter test` commands, you must have a C compiler installed. Tree-sitter will try to look for these compilers in the standard places for each platform. ### Installation @@ -766,14 +766,7 @@ grammar({ }); ``` -Then, add another C or C++ source file to your project. Currently, its path must be `src/scanner.c` or `src/scanner.cc` for the CLI to recognize it. Be sure to add this file to the `sources` section of your `binding.gyp` file so that it will be included when your project is compiled by Node.js and uncomment the appropriate block in your `bindings/rust/build.rs` file so that it will be included in your Rust crate. - -> **Note** -> -> C++ scanners are now deprecated and will be removed in the near future. -> While it is currently possible to write an external scanner in C++, it can be difficult -> to get working cross-platform and introduces extra requirements; therefore it -> is *greatly* preferred to use C. +Then, add another C source file to your project. Currently, its path must be `src/scanner.c` for the CLI to recognize it. Be sure to add this file to the `sources` section of your `binding.gyp` file so that it will be included when your project is compiled by Node.js and uncomment the appropriate block in your `bindings/rust/build.rs` file so that it will be included in your Rust crate. In this new source file, define an [`enum`][enum] type containing the names of all of your external tokens. The ordering of this enum must match the order in your grammar's `externals` array; the actual names do not matter. @@ -789,7 +782,7 @@ enum TokenType { } ``` -Finally, you must define five functions with specific names, based on your language's name and five actions: *create*, *destroy*, *serialize*, *deserialize*, and *scan*. These functions must all use [C linkage][c-linkage], so if you're writing the scanner in C++, you need to declare them with the `extern "C"` qualifier. +Finally, you must define five functions with specific names, based on your language's name and five actions: *create*, *destroy*, *serialize*, *deserialize*, and *scan*. #### Create diff --git a/src/treesitter/docs/section-7-playground.html b/src/treesitter/docs/section-7-playground.html index 72b6c385b..384af33c2 100644 --- a/src/treesitter/docs/section-7-playground.html +++ b/src/treesitter/docs/section-7-playground.html @@ -21,6 +21,7 @@

Code

+ diff --git a/src/treesitter/highlight/README.md b/src/treesitter/highlight/README.md index 982e510a6..4ca76d6c9 100644 --- a/src/treesitter/highlight/README.md +++ b/src/treesitter/highlight/README.md @@ -12,8 +12,8 @@ to parse, to your `Cargo.toml`: ```toml [dependencies] -tree-sitter-highlight = "^0.21.0" -tree-sitter-javascript = "0.20.3" +tree-sitter-highlight = "0.22.0" +tree-sitter-javascript = "0.21.3" ``` Define the list of highlight names that you will recognize: @@ -61,9 +61,8 @@ let mut javascript_config = HighlightConfiguration::new( javascript_language, "javascript", tree_sitter_javascript::HIGHLIGHT_QUERY, - tree_sitter_javascript::INJECTION_QUERY, + tree_sitter_javascript::INJECTIONS_QUERY, tree_sitter_javascript::LOCALS_QUERY, - false, ).unwrap(); ``` diff --git a/src/treesitter/lib/CMakeLists.txt b/src/treesitter/lib/CMakeLists.txt new file mode 100644 index 000000000..6e17d9a79 --- /dev/null +++ b/src/treesitter/lib/CMakeLists.txt @@ -0,0 +1,61 @@ +cmake_minimum_required(VERSION 3.13) + +project(tree-sitter + VERSION "0.23.0" + DESCRIPTION "An incremental parsing system for programming tools" + HOMEPAGE_URL "https://tree-sitter.github.io/tree-sitter/" + LANGUAGES C) + +if(NOT MSVC) + set(CMAKE_C_FLAGS "-O3 -Wall -Wextra -Wshadow -Wno-unused-parameter -pedantic") +endif(NOT MSVC) + +option(BUILD_SHARED_LIBS "Build using shared libraries" ON) +option(TREE_SITTER_FEATURE_WASM "Enable the Wasm feature" OFF) + +file(GLOB TS_SOURCE_FILES src/*.c) +list(REMOVE_ITEM TS_SOURCE_FILES "${PROJECT_SOURCE_DIR}/src/lib.c") + +add_library(tree-sitter ${TS_SOURCE_FILES}) + +target_include_directories(tree-sitter PRIVATE src src/wasm include) + +if(TREE_SITTER_FEATURE_WASM) + if(NOT DEFINED CACHE{WASMTIME_INCLUDE_DIR}) + message(CHECK_START "Looking for wasmtime headers") + find_path(WASMTIME_INCLUDE_DIR wasmtime.h + PATHS ENV DEP_WASMTIME_C_API_INCLUDE + REQUIRED) + message(CHECK_PASS "found") + endif(NOT DEFINED CACHE{WASMTIME_INCLUDE_DIR}) + + if(NOT DEFINED CACHE{WASMTIME_LIBRARY}) + message(CHECK_START "Looking for wasmtime library") + find_library(WASMTIME_LIBRARY wasmtime + REQUIRED) + message(CHECK_PASS "found") + endif(NOT DEFINED CACHE{WASMTIME_LIBRARY}) + + target_compile_definitions(tree-sitter PUBLIC TREE_SITTER_FEATURE_WASM) + target_include_directories(tree-sitter SYSTEM PRIVATE "${WASMTIME_INCLUDE_DIR}") + target_link_libraries(tree-sitter PRIVATE "${WASMTIME_LIBRARY}") + set_property(TARGET tree-sitter PROPERTY C_STANDARD_REQUIRED ON) +endif(TREE_SITTER_FEATURE_WASM) + +set_target_properties(tree-sitter + PROPERTIES + C_STANDARD 11 + C_VISIBILITY_PRESET hidden + POSITION_INDEPENDENT_CODE ON + SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}") + +configure_file(tree-sitter.pc.in "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter.pc" @ONLY) + +include(GNUInstallDirs) + +install(FILES include/tree_sitter/api.h + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/tree_sitter") +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/tree-sitter.pc" + DESTINATION "${CMAKE_INSTALL_DATAROOTDIR}/pkgconfig") +install(TARGETS tree-sitter + LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}") diff --git a/src/treesitter/lib/Cargo.toml b/src/treesitter/lib/Cargo.toml index c5dc919d2..2884d411a 100644 --- a/src/treesitter/lib/Cargo.toml +++ b/src/treesitter/lib/Cargo.toml @@ -43,7 +43,7 @@ default-features = false features = ["cranelift"] [build-dependencies] -bindgen = { version = "0.69.4", optional = true } +bindgen = { version = "0.70.1", optional = true } cc.workspace = true [lib] diff --git a/src/treesitter/lib/binding_rust/bindings.rs b/src/treesitter/lib/binding_rust/bindings.rs index 3f8312665..445de0813 100644 --- a/src/treesitter/lib/binding_rust/bindings.rs +++ b/src/treesitter/lib/binding_rust/bindings.rs @@ -1,4 +1,4 @@ -/* automatically generated by rust-bindgen 0.69.4 */ +/* automatically generated by rust-bindgen 0.70.0 */ pub const TREE_SITTER_LANGUAGE_VERSION: u32 = 14; pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: u32 = 13; @@ -373,6 +373,13 @@ extern "C" { child_index: u32, ) -> *const ::core::ffi::c_char; } +extern "C" { + #[doc = " Get the field name for node's named child at the given index, where zero\n represents the first named child. Returns NULL, if no field is found."] + pub fn ts_node_field_name_for_named_child( + self_: TSNode, + named_child_index: u32, + ) -> *const ::core::ffi::c_char; +} extern "C" { #[doc = " Get the node's number of children."] pub fn ts_node_child_count(self_: TSNode) -> u32; @@ -462,7 +469,7 @@ extern "C" { pub fn ts_tree_cursor_delete(self_: *mut TSTreeCursor); } extern "C" { - #[doc = " Re-initialize a tree cursor to start at a different node."] + #[doc = " Re-initialize a tree cursor to start at the original node that the cursor was\n constructed with."] pub fn ts_tree_cursor_reset(self_: *mut TSTreeCursor, node: TSNode); } extern "C" { @@ -637,6 +644,14 @@ extern "C" { extern "C" { pub fn ts_query_cursor_set_match_limit(self_: *mut TSQueryCursor, limit: u32); } +extern "C" { + #[doc = " Set the maximum duration in microseconds that query execution should be allowed to\n take before halting.\n\n If query execution takes longer than this, it will halt early, returning NULL.\n See [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] for more information."] + pub fn ts_query_cursor_set_timeout_micros(self_: *mut TSQueryCursor, timeout_micros: u64); +} +extern "C" { + #[doc = " Get the duration in microseconds that query execution is allowed to take.\n\n This is set via [`ts_query_cursor_set_timeout_micros`]."] + pub fn ts_query_cursor_timeout_micros(self_: *const TSQueryCursor) -> u64; +} extern "C" { #[doc = " Set the range of bytes or (row, column) positions in which the query\n will be executed."] pub fn ts_query_cursor_set_byte_range( diff --git a/src/treesitter/lib/binding_rust/lib.rs b/src/treesitter/lib/binding_rust/lib.rs index c97fd5ca6..b5856c0d3 100644 --- a/src/treesitter/lib/binding_rust/lib.rs +++ b/src/treesitter/lib/binding_rust/lib.rs @@ -291,8 +291,9 @@ pub struct LossyUtf8<'a> { } impl Language { + #[must_use] pub fn new(builder: LanguageFn) -> Self { - Self(unsafe { (builder.into_raw())() as _ }) + Self(unsafe { builder.into_raw()().cast() }) } /// Get the ABI version number that indicates which version of the @@ -1235,6 +1236,14 @@ impl<'tree> Node<'tree> { } } + /// Get the field name of this node's named child at the given index. + pub fn field_name_for_named_child(&self, named_child_index: u32) -> Option<&'static str> { + unsafe { + let ptr = ffi::ts_node_field_name_for_named_child(self.0, named_child_index); + (!ptr.is_null()).then(|| CStr::from_ptr(ptr).to_str().unwrap()) + } + } + /// Iterate over this node's children. /// /// A [`TreeCursor`] is used to retrieve the children efficiently. Obtain @@ -1381,6 +1390,20 @@ impl<'tree> Node<'tree> { Self::new(unsafe { ffi::ts_node_prev_named_sibling(self.0) }) } + /// Get the node's first child that extends beyond the given byte offset. + #[doc(alias = "ts_node_first_child_for_byte")] + #[must_use] + pub fn first_child_for_byte(&self, byte: usize) -> Option { + Self::new(unsafe { ffi::ts_node_first_child_for_byte(self.0, byte as u32) }) + } + + /// Get the node's first named child that extends beyond the given byte offset. + #[doc(alias = "ts_node_first_named_child_for_point")] + #[must_use] + pub fn first_named_child_for_byte(&self, byte: usize) -> Option { + Self::new(unsafe { ffi::ts_node_first_named_child_for_byte(self.0, byte as u32) }) + } + /// Get the node's number of descendants, including one for the node itself. #[doc(alias = "ts_node_descendant_count")] #[must_use] @@ -2362,6 +2385,26 @@ impl QueryCursor { } } + /// Set the maximum duration in microseconds that query execution should be allowed to + /// take before halting. + /// + /// If query execution takes longer than this, it will halt early, returning None. + #[doc(alias = "ts_query_cursor_set_timeout_micros")] + pub fn set_timeout_micros(&mut self, timeout: u64) { + unsafe { + ffi::ts_query_cursor_set_timeout_micros(self.ptr.as_ptr(), timeout); + } + } + + /// Get the duration in microseconds that query execution is allowed to take. + /// + /// This is set via [`set_timeout_micros`](QueryCursor::set_timeout_micros). + #[doc(alias = "ts_query_cursor_timeout_micros")] + #[must_use] + pub fn timeout_micros(&self) -> u64 { + unsafe { ffi::ts_query_cursor_timeout_micros(self.ptr.as_ptr()) } + } + /// Check if, on its last execution, this cursor exceeded its maximum number /// of in-progress matches. #[doc(alias = "ts_query_cursor_did_exceed_match_limit")] diff --git a/src/treesitter/lib/binding_web/binding.c b/src/treesitter/lib/binding_web/binding.c index fba62eba4..36efb0424 100644 --- a/src/treesitter/lib/binding_web/binding.c +++ b/src/treesitter/lib/binding_web/binding.c @@ -792,7 +792,8 @@ void ts_query_matches_wasm( uint32_t start_index, uint32_t end_index, uint32_t match_limit, - uint32_t max_start_depth + uint32_t max_start_depth, + uint32_t timeout_micros ) { if (!scratch_query_cursor) { scratch_query_cursor = ts_query_cursor_new(); @@ -810,6 +811,7 @@ void ts_query_matches_wasm( ts_query_cursor_set_byte_range(scratch_query_cursor, start_index, end_index); ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit); ts_query_cursor_set_max_start_depth(scratch_query_cursor, max_start_depth); + ts_query_cursor_set_timeout_micros(scratch_query_cursor, timeout_micros); ts_query_cursor_exec(scratch_query_cursor, self, node); uint32_t index = 0; @@ -847,7 +849,8 @@ void ts_query_captures_wasm( uint32_t start_index, uint32_t end_index, uint32_t match_limit, - uint32_t max_start_depth + uint32_t max_start_depth, + uint32_t timeout_micros ) { if (!scratch_query_cursor) { scratch_query_cursor = ts_query_cursor_new(); @@ -862,6 +865,7 @@ void ts_query_captures_wasm( ts_query_cursor_set_byte_range(scratch_query_cursor, start_index, end_index); ts_query_cursor_set_match_limit(scratch_query_cursor, match_limit); ts_query_cursor_set_max_start_depth(scratch_query_cursor, max_start_depth); + ts_query_cursor_set_timeout_micros(scratch_query_cursor, timeout_micros); ts_query_cursor_exec(scratch_query_cursor, self, node); unsigned index = 0; diff --git a/src/treesitter/lib/binding_web/binding.js b/src/treesitter/lib/binding_web/binding.js index 2b4696c3a..a626aa01c 100644 --- a/src/treesitter/lib/binding_web/binding.js +++ b/src/treesitter/lib/binding_web/binding.js @@ -1279,6 +1279,7 @@ class Query { endIndex = 0, matchLimit = 0xFFFFFFFF, maxStartDepth = 0xFFFFFFFF, + timeoutMicros = 0, } = {}, ) { if (typeof matchLimit !== 'number') { @@ -1298,6 +1299,7 @@ class Query { endIndex, matchLimit, maxStartDepth, + timeoutMicros, ); const rawCount = getValue(TRANSFER_BUFFER, 'i32'); @@ -1342,6 +1344,7 @@ class Query { endIndex = 0, matchLimit = 0xFFFFFFFF, maxStartDepth = 0xFFFFFFFF, + timeoutMicros = 0, } = {}, ) { if (typeof matchLimit !== 'number') { @@ -1361,6 +1364,7 @@ class Query { endIndex, matchLimit, maxStartDepth, + timeoutMicros, ); const count = getValue(TRANSFER_BUFFER, 'i32'); diff --git a/src/treesitter/lib/binding_web/test/query-test.js b/src/treesitter/lib/binding_web/test/query-test.js index fad6b3cf2..db4c10f8a 100644 --- a/src/treesitter/lib/binding_web/test/query-test.js +++ b/src/treesitter/lib/binding_web/test/query-test.js @@ -451,6 +451,17 @@ describe('Query', () => { ]); }); }); + + describe('Set a timeout', () => + it('returns less than the expected matches', () => { + tree = parser.parse('function foo() while (true) { } }\n'.repeat(1000)); + query = JavaScript.query('(function_declaration name: (identifier) @function)'); + const matches = query.matches(tree.rootNode, { timeoutMicros: 1000 }); + assert.isBelow(matches.length, 1000); + const matches2 = query.matches(tree.rootNode, { timeoutMicros: 0 }); + assert.equal(matches2.length, 1000); + }) + ); }); function formatMatches(matches) { diff --git a/src/treesitter/lib/binding_web/tree-sitter-web.d.ts b/src/treesitter/lib/binding_web/tree-sitter-web.d.ts index 97a48077e..cfd8a1020 100644 --- a/src/treesitter/lib/binding_web/tree-sitter-web.d.ts +++ b/src/treesitter/lib/binding_web/tree-sitter-web.d.ts @@ -150,11 +150,10 @@ declare module 'web-tree-sitter' { rootNodeWithOffset(offsetBytes: number, offsetExtent: Point): SyntaxNode; copy(): Tree; delete(): void; - edit(edit: Edit): Tree; + edit(edit: Edit): void; walk(): TreeCursor; getChangedRanges(other: Tree): Range[]; getIncludedRanges(): Range[]; - getEditedRange(other: Tree): Range; getLanguage(): Language; } @@ -179,6 +178,7 @@ declare module 'web-tree-sitter' { endIndex?: number; matchLimit?: number; maxStartDepth?: number; + timeoutMicros?: number; }; export interface PredicateResult { diff --git a/src/treesitter/lib/include/tree_sitter/api.h b/src/treesitter/lib/include/tree_sitter/api.h index c1fbad254..4c19bbdf8 100644 --- a/src/treesitter/lib/include/tree_sitter/api.h +++ b/src/treesitter/lib/include/tree_sitter/api.h @@ -570,6 +570,12 @@ TSNode ts_node_child(TSNode self, uint32_t child_index); */ const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index); +/** + * Get the field name for node's named child at the given index, where zero + * represents the first named child. Returns NULL, if no field is found. + */ +const char *ts_node_field_name_for_named_child(TSNode self, uint32_t named_child_index); + /** * Get the node's number of children. */ @@ -983,6 +989,22 @@ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self); uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self); void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit); +/** + * Set the maximum duration in microseconds that query execution should be allowed to + * take before halting. + * + * If query execution takes longer than this, it will halt early, returning NULL. + * See [`ts_query_cursor_next_match`] or [`ts_query_cursor_next_capture`] for more information. + */ +void ts_query_cursor_set_timeout_micros(TSQueryCursor *self, uint64_t timeout_micros); + +/** + * Get the duration in microseconds that query execution is allowed to take. + * + * This is set via [`ts_query_cursor_set_timeout_micros`]. + */ +uint64_t ts_query_cursor_timeout_micros(const TSQueryCursor *self); + /** * Set the range of bytes or (row, column) positions in which the query * will be executed. diff --git a/src/treesitter/lib/language/language.rs b/src/treesitter/lib/language/language.rs index 4c194da77..504c93744 100644 --- a/src/treesitter/lib/language/language.rs +++ b/src/treesitter/lib/language/language.rs @@ -1,10 +1,11 @@ #![no_std] -/// LanguageFn wraps a C function that returns a pointer to a tree-sitter grammer. +/// `LanguageFn` wraps a C function that returns a pointer to a tree-sitter grammer. #[repr(transparent)] +#[derive(Clone, Copy)] pub struct LanguageFn(unsafe extern "C" fn() -> *const ()); impl LanguageFn { - /// Creates a `LanguageFn`. + /// Creates a [`LanguageFn`]. /// /// # Safety /// @@ -14,7 +15,8 @@ impl LanguageFn { Self(f) } - /// Gets the function wrapped by this `LanguageFn`. + /// Gets the function wrapped by this [`LanguageFn`]. + #[must_use] pub const fn into_raw(self) -> unsafe extern "C" fn() -> *const () { self.0 } diff --git a/src/treesitter/lib/src/node.c b/src/treesitter/lib/src/node.c index 1c0eea738..83d48cb8e 100644 --- a/src/treesitter/lib/src/node.c +++ b/src/treesitter/lib/src/node.c @@ -12,6 +12,8 @@ typedef struct { const TSSymbol *alias_sequence; } NodeChildIterator; +static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous); + // TSNode - constructors TSNode ts_node_new( @@ -101,6 +103,21 @@ static inline bool ts_node_child_iterator_next( return true; } +// This will return true if the next sibling is a zero-width token that is adjacent to the current node and is relevant +static inline bool ts_node_child_iterator_next_sibling_is_empty_adjacent(NodeChildIterator *self, TSNode previous) { + if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; + if (self->child_index == 0) return false; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; + TSSymbol alias = 0; + if (!ts_subtree_extra(*child)) { + if (self->alias_sequence) { + alias = self->alias_sequence[self->structural_child_index]; + } + } + TSNode next = ts_node_new(self->tree, child, self->position, alias); + return ts_node_end_byte(previous) == ts_node_end_byte(next) && ts_node__is_relevant(next, true); +} + // TSNode - private static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { @@ -304,22 +321,36 @@ static inline TSNode ts_node__first_child_for_byte( TSNode node = self; bool did_descend = true; + NodeChildIterator last_iterator; + bool has_last_iterator = false; + while (did_descend) { did_descend = false; TSNode child; NodeChildIterator iterator = ts_node_iterate_children(&node); + loop: while (ts_node_child_iterator_next(&iterator, &child)) { if (ts_node_end_byte(child) > goal) { if (ts_node__is_relevant(child, include_anonymous)) { return child; } else if (ts_node_child_count(child) > 0) { + if (iterator.child_index < ts_subtree_child_count(ts_node__subtree(child))) { + last_iterator = iterator; + has_last_iterator = true; + } did_descend = true; node = child; break; } } } + + if (!did_descend && has_last_iterator) { + iterator = last_iterator; + has_last_iterator = false; + goto loop; + } } return ts_node__null(); @@ -530,6 +561,24 @@ TSNode ts_node_child_containing_descendant(TSNode self, TSNode subnode) { ) { return ts_node__null(); } + + // Here we check the current self node and *all* of its zero-width token siblings that follow. + // If any of these nodes contain the target subnode, we return that node. Otherwise, we restore the node we started at + // for the loop condition, and that will continue with the next *non-zero-width* sibling. + TSNode old = self; + // While the next sibling is a zero-width token + while (ts_node_child_iterator_next_sibling_is_empty_adjacent(&iter, self)) { + TSNode current_node = ts_node_child_containing_descendant(self, subnode); + // If the target child is in self, return it + if (!ts_node_is_null(current_node)) { + return current_node; + } + ts_node_child_iterator_next(&iter, &self); + if (self.id == subnode.id) { + return ts_node__null(); + } + } + self = old; } while (iter.position.bytes < end_byte || ts_node_child_count(self) == 0); } while (!ts_node__is_relevant(self, true)); @@ -674,6 +723,48 @@ const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) { return NULL; } +const char *ts_node_field_name_for_named_child(TSNode self, uint32_t named_child_index) { + TSNode result = self; + bool did_descend = true; + const char *inherited_field_name = NULL; + + while (did_descend) { + did_descend = false; + + TSNode child; + uint32_t index = 0; + NodeChildIterator iterator = ts_node_iterate_children(&result); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (ts_node__is_relevant(child, false)) { + if (index == named_child_index) { + if (ts_node_is_extra(child)) { + return NULL; + } + const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); + if (field_name) return field_name; + return inherited_field_name; + } + index++; + } else { + uint32_t named_grandchild_index = named_child_index - index; + uint32_t grandchild_count = ts_node__relevant_child_count(child, false); + if (named_grandchild_index < grandchild_count) { + const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1); + if (field_name) inherited_field_name = field_name; + + did_descend = true; + result = child; + named_child_index = named_grandchild_index; + break; + } + index += grandchild_count; + } + } + } + + return NULL; +} + TSNode ts_node_child_by_field_name( TSNode self, const char *name, diff --git a/src/treesitter/lib/src/parser.c b/src/treesitter/lib/src/parser.c index 2927d8205..5db2cf50e 100644 --- a/src/treesitter/lib/src/parser.c +++ b/src/treesitter/lib/src/parser.c @@ -83,7 +83,7 @@ static const unsigned MAX_VERSION_COUNT = 6; static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4; static const unsigned MAX_SUMMARY_DEPTH = 16; static const unsigned MAX_COST_DIFFERENCE = 16 * ERROR_COST_PER_SKIPPED_TREE; -static const unsigned OP_COUNT_PER_TIMEOUT_CHECK = 100; +static const unsigned OP_COUNT_PER_PARSER_TIMEOUT_CHECK = 100; typedef struct { Subtree token; @@ -1565,7 +1565,7 @@ static bool ts_parser__advance( // If a cancellation flag or a timeout was provided, then check every // time a fixed number of parse actions has been processed. - if (++self->operation_count == OP_COUNT_PER_TIMEOUT_CHECK) { + if (++self->operation_count == OP_COUNT_PER_PARSER_TIMEOUT_CHECK) { self->operation_count = 0; } if ( diff --git a/src/treesitter/lib/src/query.c b/src/treesitter/lib/src/query.c index c9e8fbd0c..4941f507c 100644 --- a/src/treesitter/lib/src/query.c +++ b/src/treesitter/lib/src/query.c @@ -1,6 +1,7 @@ #include "tree_sitter/api.h" #include "./alloc.h" #include "./array.h" +#include "./clock.h" #include "./language.h" #include "./point.h" #include "./tree_cursor.h" @@ -312,6 +313,9 @@ struct TSQueryCursor { TSPoint start_point; TSPoint end_point; uint32_t next_state_id; + TSClock end_clock; + TSDuration timeout_duration; + unsigned operation_count; bool on_visible_node; bool ascending; bool halted; @@ -322,6 +326,7 @@ static const TSQueryError PARENT_DONE = -1; static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX; static const uint16_t NONE = UINT16_MAX; static const TSSymbol WILDCARD_SYMBOL = 0; +static const unsigned OP_COUNT_PER_QUERY_TIMEOUT_CHECK = 100; /********** * Stream @@ -2986,6 +2991,9 @@ TSQueryCursor *ts_query_cursor_new(void) { .start_point = {0, 0}, .end_point = POINT_MAX, .max_start_depth = UINT32_MAX, + .timeout_duration = 0, + .end_clock = clock_null(), + .operation_count = 0, }; array_reserve(&self->states, 8); array_reserve(&self->finished_states, 8); @@ -3012,6 +3020,14 @@ void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) { self->capture_list_pool.max_capture_list_count = limit; } +uint64_t ts_query_cursor_timeout_micros(const TSQueryCursor *self) { + return duration_to_micros(self->timeout_duration); +} + +void ts_query_cursor_set_timeout_micros(TSQueryCursor *self, uint64_t timeout_micros) { + self->timeout_duration = duration_from_micros(timeout_micros); +} + #ifdef DEBUG_EXECUTE_QUERY #define LOG(...) fprintf(stderr, __VA_ARGS__) #else @@ -3023,7 +3039,7 @@ void ts_query_cursor_exec( const TSQuery *query, TSNode node ) { - if (query) { + if (query) { LOG("query steps:\n"); for (unsigned i = 0; i < query->steps.size; i++) { QueryStep *step = &query->steps.contents[i]; @@ -3060,6 +3076,12 @@ void ts_query_cursor_exec( self->halted = false; self->query = query; self->did_exceed_match_limit = false; + self->operation_count = 0; + if (self->timeout_duration) { + self->end_clock = clock_after(clock_now(), self->timeout_duration); + } else { + self->end_clock = clock_null(); + } } void ts_query_cursor_set_byte_range( @@ -3456,7 +3478,19 @@ static inline bool ts_query_cursor__advance( } } - if (did_match || self->halted) return did_match; + if (++self->operation_count == OP_COUNT_PER_QUERY_TIMEOUT_CHECK) { + self->operation_count = 0; + } + if ( + did_match || + self->halted || + ( + self->operation_count == 0 && + !clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock) + ) + ) { + return did_match; + } // Exit the current node. if (self->ascending) { diff --git a/src/treesitter/lib/src/tree_cursor.c b/src/treesitter/lib/src/tree_cursor.c index ddd7d66b5..24416663b 100644 --- a/src/treesitter/lib/src/tree_cursor.c +++ b/src/treesitter/lib/src/tree_cursor.c @@ -475,8 +475,9 @@ uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) { TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); - TSSymbol alias_symbol = self->root_alias_symbol; - if (self->stack.size > 1 && !ts_subtree_extra(*last_entry->subtree)) { + bool is_extra = ts_subtree_extra(*last_entry->subtree); + TSSymbol alias_symbol = is_extra ? 0 : self->root_alias_symbol; + if (self->stack.size > 1 && !is_extra) { TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; alias_symbol = ts_language_alias_at( self->tree->language, diff --git a/src/treesitter/lib/tree-sitter.pc.in b/src/treesitter/lib/tree-sitter.pc.in new file mode 100644 index 000000000..60fe5c4a6 --- /dev/null +++ b/src/treesitter/lib/tree-sitter.pc.in @@ -0,0 +1,10 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ + +Name: tree-sitter +Description: @PROJECT_DESCRIPTION@ +URL: @PROJECT_HOMEPAGE_URL@ +Version: @PROJECT_VERSION@ +Libs: -L${libdir} -ltree-sitter +Cflags: -I${includedir} diff --git a/src/treesitter/script/generate-bindings b/src/treesitter/script/generate-bindings index fe83352b5..a0022d8f7 100755 --- a/src/treesitter/script/generate-bindings +++ b/src/treesitter/script/generate-bindings @@ -37,6 +37,7 @@ bindgen \ --blocklist-type '^__.*' \ --no-prepend-enum-name \ --no-copy "$no_copy" \ + --use-core \ "$header_path" \ -- \ -D TREE_SITTER_FEATURE_WASM \ diff --git a/src/treesitter/tree-sitter.pc.in b/src/treesitter/tree-sitter.pc.in deleted file mode 100644 index f98816cb7..000000000 --- a/src/treesitter/tree-sitter.pc.in +++ /dev/null @@ -1,10 +0,0 @@ -prefix=@PREFIX@ -libdir=@LIBDIR@ -includedir=@INCLUDEDIR@ - -Name: tree-sitter -Description: An incremental parsing system for programming tools -URL: https://tree-sitter.github.io/ -Version: @VERSION@ -Libs: -L${libdir} -ltree-sitter -Cflags: -I${includedir}