diff --git a/Cargo.lock b/Cargo.lock index 76335a70c..66e24558f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,43 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "addr2line" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e4503c46a5c0c7844e948c9a4d6acd9f50cccb4de1c48eb9e291ea17470c678" +dependencies = [ + "gimli", +] + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "getrandom", + "once_cell", + "version_check", + "zerocopy", +] + +[[package]] +name = "aligned-vec" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e0966165eaf052580bd70eb1b32cb3d6245774c0104d1b2793e9650bf83b52a" +dependencies = [ + "equator", +] + [[package]] name = "anes" version = "0.1.6" @@ -104,6 +141,12 @@ version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a7924531f38b1970ff630f03eb20a2fde69db5c590c93b0f3482e95dcc5fd60" +[[package]] +name = "arrayvec" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" + [[package]] name = "atty" version = "0.2.14" @@ -132,6 +175,21 @@ dependencies = [ "syn 2.0.60", ] +[[package]] +name = "backtrace" +version = "0.3.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc23269a4f8976d0a4d2e7109211a419fe30e8d88d677cd60b6bc79c5732e0a" +dependencies = [ + "addr2line", + "cc", + "cfg-if", + "libc", + "miniz_oxide", + "object", + "rustc-demangle", +] + [[package]] name = "bit-set" version = "0.5.3" @@ -155,9 +213,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.3.3" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "630be753d4e58660abd17930c71b647fe46c27ea6b63cc59e1e3851406972e42" +checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" [[package]] name = "block-buffer" @@ -170,9 +228,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.13.0" +version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3e2c3daef883ecc1b5d58c15adae93470a91d425f3532ba1695849656af3fc1" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" [[package]] name = "bytecount" @@ -180,6 +238,12 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" +[[package]] +name = "bytemuck" +version = "1.19.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" + [[package]] name = "byteorder" version = "1.4.3" @@ -194,11 +258,13 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "cc" -version = "1.0.79" +version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f" +checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" dependencies = [ "jobserver", + "libc", + "shlex", ] [[package]] @@ -271,6 +337,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" +[[package]] +name = "cpp_demangle" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96e58d342ad113c2b878f16d5d034c03be492ae460cdbc02b7f0f2284d310c7d" +dependencies = [ + "cfg-if", +] + [[package]] name = "cpufeatures" version = "0.2.5" @@ -379,6 +454,15 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "debugid" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef552e6f588e446098f6ba40d89ac146c8c7b64aade83c051ee00bb5d2bc18d" +dependencies = [ + "uuid", +] + [[package]] name = "diff" version = "0.1.13" @@ -407,6 +491,32 @@ version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7fcaabb2fef8c910e7f4c7ce9f67a1283a1715879a7c230ca9d6d1ae31f16d91" +[[package]] +name = "equator" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c35da53b5a021d2484a7cc49b2ac7f2d840f8236a286f84202369bd338d761ea" +dependencies = [ + "equator-macro", +] + +[[package]] +name = "equator-macro" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bf679796c0322556351f287a51b49e48f7c4986e727b5dd78c972d30e2e16cc" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.60", +] + +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.1" @@ -449,6 +559,18 @@ dependencies = [ "instant", ] +[[package]] +name = "findshlibs" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40b9e59cd0f7e0806cca4be089683ecb6434e602038df21fe6bf6711b2f07f64" +dependencies = [ + "cc", + "lazy_static", + "libc", + "winapi", +] + [[package]] name = "fnv" version = "1.0.7" @@ -476,6 +598,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "gimli" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40ecd4077b5ae9fd2e9e169b102c6c330d0605168eb0e8bf79952b256dbefffd" + [[package]] name = "half" version = "1.8.2" @@ -496,6 +624,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "hashbrown" +version = "0.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3" + [[package]] name = "hermit-abi" version = "0.1.19" @@ -520,6 +654,34 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fed44880c466736ef9a5c5b5facefb5ed0785676d0c02d612db14e54f0d84286" +[[package]] +name = "indexmap" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "inferno" +version = "0.11.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "232929e1d75fe899576a3d5c7416ad0d88dbfbb3c3d6aa00873a7408a50ddb88" +dependencies = [ + "ahash", + "indexmap", + "is-terminal", + "itoa", + "log", + "num-format", + "once_cell", + "quick-xml 0.26.0", + "rgb", + "str_stack", +] + [[package]] name = "instant" version = "0.1.12" @@ -577,9 +739,9 @@ checksum = "453ad9f582a441959e5f0d088b02ce04cfe8d51a8eaf077f12ac6d3e94164ca6" [[package]] name = "jobserver" -version = "0.1.26" +version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2" +checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0" dependencies = [ "libc", ] @@ -640,6 +802,16 @@ version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57bcfdad1b858c2db7c38303a6d2ad4dfaf5eb53dfeb0910128b2c26d6158503" +[[package]] +name = "lock_api" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" +dependencies = [ + "autocfg", + "scopeguard", +] + [[package]] name = "log" version = "0.4.19" @@ -652,6 +824,15 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" +[[package]] +name = "memmap2" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "45fd3a57831bf88bc63f8cebc0cf956116276e97fef3966103e96416209f7c92" +dependencies = [ + "libc", +] + [[package]] name = "memoffset" version = "0.9.0" @@ -661,12 +842,42 @@ dependencies = [ "autocfg", ] +[[package]] +name = "miniz_oxide" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8a240ddb74feaf34a79a7add65a741f3167852fba007066dcac1ca548d89c08" +dependencies = [ + "adler", +] + +[[package]] +name = "nix" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +dependencies = [ + "bitflags 1.3.2", + "cfg-if", + "libc", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" +[[package]] +name = "num-format" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a652d9771a63711fd3c3deb670acfbe5c30a4072e664d7a3bf5a9e1056ac72c3" +dependencies = [ + "arrayvec", + "itoa", +] + [[package]] name = "num-traits" version = "0.2.15" @@ -687,6 +898,15 @@ dependencies = [ "libc", ] +[[package]] +name = "object" +version = "0.36.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aedf0a2d09c573ed1d8d85b30c119153926a2b36dce0ab28322c09a117a4683e" +dependencies = [ + "memchr", +] + [[package]] name = "once_cell" version = "1.18.0" @@ -718,6 +938,29 @@ dependencies = [ "winapi", ] +[[package]] +name = "parking_lot" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall 0.5.7", + "smallvec", + "windows-targets 0.52.6", +] + [[package]] name = "pest" version = "2.5.5" @@ -790,6 +1033,29 @@ dependencies = [ "plotters-backend", ] +[[package]] +name = "pprof" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebbe2f8898beba44815fdc9e5a4ae9c929e21c5dc29b0c774a15555f7f58d6d0" +dependencies = [ + "aligned-vec", + "backtrace", + "cfg-if", + "criterion", + "findshlibs", + "inferno", + "libc", + "log", + "nix", + "once_cell", + "parking_lot", + "smallvec", + "symbolic-demangle", + "tempfile", + "thiserror", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -852,6 +1118,15 @@ dependencies = [ "memchr", ] +[[package]] +name = "quick-xml" +version = "0.26.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" +dependencies = [ + "memchr", +] + [[package]] name = "quote" version = "1.0.36" @@ -931,6 +1206,15 @@ dependencies = [ "bitflags 1.3.2", ] +[[package]] +name = "redox_syscall" +version = "0.5.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" +dependencies = [ + "bitflags 2.6.0", +] + [[package]] name = "regex" version = "1.8.4" @@ -952,6 +1236,21 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "436b050e76ed2903236f032a59761c1eb99e1b0aead2c257922771dab1fc8c78" +[[package]] +name = "rgb" +version = "0.8.50" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57397d16646700483b67d2dd6511d79318f9d057fdbd21a4066aeac8b41d310a" +dependencies = [ + "bytemuck", +] + +[[package]] +name = "rustc-demangle" +version = "0.1.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" + [[package]] name = "rustc-hash" version = "1.1.0" @@ -978,7 +1277,7 @@ version = "0.38.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1ee020b1716f0a80e2ace9b03441a749e402e86712f15f16fe8a8f75afac732f" dependencies = [ - "bitflags 2.3.3", + "bitflags 2.6.0", "errno", "libc", "linux-raw-sys 0.4.5", @@ -1060,12 +1359,24 @@ dependencies = [ "digest", ] +[[package]] +name = "shlex" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" + [[package]] name = "similar" version = "2.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "420acb44afdae038210c99e69aae24109f32f15500aa708e81d46c9f29d55fcf" +[[package]] +name = "smallvec" +version = "1.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" + [[package]] name = "snapbox" version = "0.6.5" @@ -1089,6 +1400,41 @@ dependencies = [ "anstream 0.6.14", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + +[[package]] +name = "str_stack" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" + +[[package]] +name = "symbolic-common" +version = "12.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d4d73159efebfb389d819fd479afb2dbd57dcb3e3f4b7fcfa0e675f5a46c1cb" +dependencies = [ + "debugid", + "memmap2", + "stable_deref_trait", + "uuid", +] + +[[package]] +name = "symbolic-demangle" +version = "12.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a767859f6549c665011970874c3f541838b4835d5aaaa493d3ee383918be9f10" +dependencies = [ + "cpp_demangle", + "rustc-demangle", + "symbolic-common", +] + [[package]] name = "syn" version = "1.0.109" @@ -1120,7 +1466,7 @@ dependencies = [ "autocfg", "cfg-if", "fastrand", - "redox_syscall", + "redox_syscall 0.3.5", "rustix 0.37.20", "windows-sys 0.48.0", ] @@ -1136,7 +1482,7 @@ dependencies = [ "handlebars", "os_pipe", "pretty_assertions", - "quick-xml", + "quick-xml 0.23.1", "serde", "termcolor", "unicode-width", @@ -1227,6 +1573,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "uuid" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" + [[package]] name = "version_check" version = "0.9.4" @@ -1576,12 +1928,14 @@ dependencies = [ "anstyle", "anyhow", "automod", + "bumpalo", "circular", "criterion", "doc-comment", "is-terminal", "lexopt", "memchr", + "pprof", "proptest", "rustc-hash", "snapbox", @@ -1602,3 +1956,23 @@ name = "yansi" version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.60", +] diff --git a/Cargo.toml b/Cargo.toml index 86a2653ef..15178344d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -131,6 +131,8 @@ memchr = { version = "2.5", optional = true, default-features = false } terminal_size = { version = "0.4.0", optional = true } [dev-dependencies] +bumpalo = { version ="3.14.0", features = ["boxed", "collections"] } +pprof = { version = "0.14", features = ["flamegraph", "criterion"] } doc-comment = "0.3" proptest = "1.2.0" criterion = "0.5.1" @@ -197,6 +199,17 @@ required-features = ["alloc"] name = "string" required-features = ["alloc"] +[[example]] +name = "pratt" +required-features = ["std"] + + +[[bench]] +name = "pratt-example" +path = "examples/pratt/bench.rs" +harness = false +required-features = ["std"] + [[bench]] name = "arithmetic" path = "examples/arithmetic/bench.rs" @@ -241,5 +254,10 @@ path = "examples/json/bench.rs" harness = false required-features = ["std"] +[[bench]] +name = "pratt" +path = "benches/pratt.rs" +harness = false + [lints] workspace = true diff --git a/benches/ariphmetic.txt b/benches/ariphmetic.txt new file mode 100644 index 000000000..35e6877e8 --- /dev/null +++ b/benches/ariphmetic.txt @@ -0,0 +1 @@ +1+1*1*2*2*1+1*(2*1+1)*3*1+1*2*1*1+1+1*3*1*1+1+1+1*4*1*1+1+1+1+1*5*1*1+1-2-3*1+1-2-3*(1+1)-2-3*1+(1-2-3)*1*1^2*1*1^2+2*1^3*1*1^2+2*1^3+3*1^4*1*1^2+2*1^3+3*1^4+3*1^5*1*1*1^2*1*1^2+2*1^3*1*1^2+2*1^3+3*1^4*1*1^2+2*1^3+3*1^4+4*1^5*1+1+1*1*1*1*1-1-1*1-1-1*1-1+1-1+1-1*1+1-1+1-1+1+1*1-1*1-1*1+1-1*1-1*1-1-1-1-1+1+1+1*1+1+1-1-1-1*1*1*1*1-1-1-1-1-1*1*1*1+1+-1*1^2+1*1+1^2*1^2*2*2*1^2*1^2^3*1^2^3-1^8*+1^+2^+3-1^+8*1^-2^1-1-1^8*1^1-2^1-3-1-1^1-8*1-1^2^3-1^8*(-1+1^2^3)-(1^2^3-1)*(-1*1^2^3)-(1^2^3*-1)*(1^2^3*4)-(1^2^3-4)*1^2^3^1*(1^2)^3*1^(2^3)*1^2^3*1^(2*3)*1^(2^3)*1^2^3-1^13*(1*(2*(3*(4*(5*(6*(7*(1+1))))))))*(1+(2+(3+(4+(5+(6+(7+(1*1))))))))*((((((((1-1)*1)*2)*3)*4)*5)*6)*7)*((((((((1-1)+1)+2)+3)+4)+5)+6)+7)*(2+(3+(4+(5+(((((1*1)*9)*8)*7)*6)))))*(2*(3*(4*(5*(((((1-1)+9)+8)+7)+6)))))*((((((((1-1)*(1-2))*(2-3))*(3-4))*(4-5))*(5-6))*(6-7))*(7-8))*((((((((1-1)+(1-2))+(2-3))+(3-4))+(4-5))+(5-6))+(6-7))+(7-8))*((2-3)+((3-4)+((4-5)+((5-6)+(((((1*1)*(9-10))*(8-9))*(7-8))*(6-8))))))*((2-3)*((3-4)*((4-5)*((5-6)*(((((1*1)+(9-10))+(8-9))+(7-8))+(6-8))))))*(1*(2*(3*(4+(5+(6+(7*(8*(9*(10*(1+1)))))))))))*(((((((((((1-1)*1)*2)*3)+4)+5)+6)*7)*8)+9)+10)*(1-1)+(2-1)+(3-1)+(4-1)+(5-1)+(6-1)+(7-1)+(8-1)+(9-1)+(1-1)*(1-(1+1))+(2-(1-2))+(3-(1-3))+(4-(1-4))+(5-(1-5))+(6-(1-6))+(7-(1-7))+(8-(1-8))+(9-(1-9))*(1-1)+(1-2)+(1-3)+(1-4)+(1-5)+(1-6)+(1-7)+(1-8)+(1-9)+(1-1)*((1-1)-1)+((1-2)-2)+((1-3)-3)+((1-4)-4)+((1-5)-5)+((1-6)-6)+((1-7)-7)+((1-8)-8)+((1-9)-9)*(1-1)+(2-1)+(3-1)+(4-1)+(5-1)+(6-1)+(7-1)+(8-1)+(9-1)+(1-1)*(1-(1-1))+(2-(1-2))+(3-(1-3))+(4-(1-4))+(5-(1-5))+(6-(1-6))+(7-(1-7))+(8-(1-8))+(9-(1-9))*(1-1)+(1-2)+(1-3)+(1-4)+(1-5)+(1-6)+(1-7)+(1-8)+(1-9)+(1-1)*((1+1)-1)+((1-2)-2)+((1-3)-3)+((1-4)-4)+((1-5)-5)+((1-6)-6)+((1-7)-7)+((1-8)-8)+((1-9)-9)*(1+1-1)*(1-1-1)*1-((1+1)*(1-1))-1*1-((1*1)+(1-1))-3*1+1*(1+1)*3*(2*1+2*1)*2*(2*1)*(2*1)*2*-(1^1)*1+1*1*1*1+1*1+1*(1+1)*(1+1)*(-3)*1+1-1*1-5^6*1^1-1*1-5+6*2*(1+1)*((1+1)-2)+(1-(3*1))*((1+1+2+1+3)+1+4+1+5+1+6+1+7)-(1+1+2+1+3+1+4+1+5+(1+6+1+7))*(1+1+2+1+3+1+4+(1+5+1+6)+1+7)*(1+(1+2+1+3)+1+4+1+5+1+6+1+7)*(1-((((1+(((1*(((((1*((((3*((1+1)+1))+1)+1)*1))+0)+1)+1)-1))+1)+1))+1)*1)-1))*(1+1)*(1+2)*(1+3)*(1+4)*(1+5)*(1+6)*(1+7)*(1+8)*(1+9)*(1+10)*(1+11)*(1+12)*(1+(1+1)*(1+2))*(2+(1+3)*(1+4))*(3+(1+5)*(1+6))*(4+(1+7)*(1+8))*(5+(1+9)*(1+10))*(6+(1+11)*(1+12))*(5+1)+(2*1-2-3*1)*(1-3+1-4)+(1+7)*2*1^1+1*3*1^2+2*1^1+1*4*1^3+3*1^2+2*1^1+1*5*1^4+4*1^3+3*1^2+2*1^1+1*6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*9*1^8+8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*1*1^9+9*1^8+8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*2*1^10+1*1^9+9*1^8+8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*3*1^11+2*1^10+1*1^9+9*1^8+8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*4*1^12+3*1^11+2*1^10+1*1^9+9*1^8+8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*(2*1+1)*((3*1+2)*1+1)*(((4*1+3)*1+2)*1+1)*((((5*1+4)*1+3)*1+2)*1+1)*(((((6*1+5)*1+4)*1+3)*1+2)*1+1)*((((((7*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*(((((((8*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*((((((((9*1+8)*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*(((((((((1*1+9)*1+8)*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*((((((((((2*1+1)*1+9)*1+8)*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*(((((((((((3*1+2)*1+1)*1+9)*1+8)*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*((((((((((((4*1+3)*1+2)*1+1)*1+9)*1+8)*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*+1++2+1++2-1+-2-1+2-+1+2+-1+2--1++2-+1++2+-1++2--1+-1-1++1+1++1-+1++1+-1++1--1+-1*-1++1*+1+1*+1++1*1+-1--1++1-+1+(1)+(1)+(1)-(1)+-(1)+(1)+-(1)-(1)++(1)-(1)+(+1)+(-1)+(-1)+(+1)+(-1)+(-1)+(-1)-(-1)+(+1)+(+1)+(+(1))-(1)+(-(1))+(-(1))+(+(1))+(+(1))+(-(1))-(-(1))+3-3-((+1)+(-1))+3-3-((-1)+(+1))+3-3-((-1)+(-1))+3-3-((-1)-(-1))+3-3-((+1)-(-1))+3-3-((-1)-(+1))+3-3-((+1)*(-1))+3-3-((-1)*(+1))+3-3+((-1+1)+(-1+1))+3-3+((+1-1)+(+1-1))+3-3+((+1-1)+(+1-1))+3-3+((-1+1)+(-1+1))+3-3+((-1+1)-(-1+1))+3-3+((+1-1)-(+1-1))+3-3+((+1-1)-(+1-1))+3-3+((-1+1)-(-1+1))+3-3+((+1)+(+1))+3+((+1)-(+1))+3-3+((+1)*(+1))+3-3+((((+1))+((+1))))+3-3+((+1-1)+((+1-1)))+3-3+((((+1-2))+((+1-2))))+3-3+((((+1-(+2)))+((+1-(+2)))))+1^1+1^-1++1^+1++1^-1+-1^+1+-1^-1+-1^(-1)++1^+1+111++1^-1+111+-1^+1+111+-1^-1+111++1+2^3++1+2^+3++111+1^1++111+1^+1++111+1^(0+1)++111+1^(+1-0)++111+1^-1++111-1^1++111-1^+1++111-1^(0+1)++111-1^(+1-0)++111-1^-1++1^+1-111++1^-1-111+-1^+1-111+-1^-1-111++111-1^(010+1)++111-1^(+1-010)++111*1^1++111*1^+1++111*1^(0+1)++111*1^(+1-0)++111*1^-1+-111*1^1+-111*1^+1+-111*1^(0+1)+-111*1^(+1-0)+-111*1^-1+-1^2^3-1^6+-(-1^2^3)+1^6+-1^0+-1^1-1+-1^1-1+-1^(1-1)+-1^0+-1^1-1+-1^1-1+-1^(1-1)++++1 diff --git a/benches/pratt.rs b/benches/pratt.rs new file mode 100644 index 000000000..17b701a93 --- /dev/null +++ b/benches/pratt.rs @@ -0,0 +1,102 @@ +#[macro_use] +extern crate criterion; + +use criterion::black_box; +use criterion::Criterion; + +use winnow::ascii::digit1; +use winnow::combinator::delimited; +use winnow::combinator::empty; +use winnow::combinator::fail; +use winnow::combinator::peek; +use winnow::dispatch; +use winnow::token::any; +use winnow::PResult; +use winnow::Parser; + +type Stream<'i> = &'i [u8]; + +static CORPUS: &str = include_str!("ariphmetic.txt"); + +fn pratt_parser(i: &mut Stream<'_>) -> PResult { + use winnow::combinator::precedence; + // precedence::precedence( + // dispatch! {peek(any); + // b'(' => delimited(b'(', pratt_parser, b')'), + // _ => digit1.parse_to::() + // }, + // dispatch! {any; + // b'+' => empty.value((9, (&|a| a) as _)), + // b'-' => empty.value((9, (&|a: i64| -a) as _)), + // _ => fail + // }, + // fail, + // dispatch! {any; + // b'+' => empty.value((5, 6, (&|a, b| a + b) as _)), + // b'-' => empty.value((5, 6, (&|a, b| a - b) as _)), + // b'*' => empty.value((7, 8, (&|a, b| a * b) as _)), + // b'/' => empty.value((7, 8, (&|a, b| a / b) as _)), + // b'%' => empty.value((7, 8, (&|a, b| a % b) as _)), + // b'^' => empty.value((9, 10, (&|a, b| a ^ b) as _)), + // _ => fail + // }, + // ) + // .parse_next(i) + Ok(0) +} + +fn shunting_yard_parser(i: &mut Stream<'_>) -> PResult { + use winnow::combinator::shunting_yard; + // shunting_yard::precedence( + // dispatch! {peek(any); + // b'(' => delimited(b'(', shunting_yard_parser, b')'), + // _ => digit1.parse_to::() + // }, + // dispatch! {any; + // b'+' => empty.value((9, (&|a| a) as _)), + // b'-' => empty.value((9, (&|a: i64| -a) as _)), + // _ => fail + // }, + // fail, + // dispatch! {any; + // b'+' => empty.value((5, 6, (&|a, b| a + b) as _)), + // b'-' => empty.value((5, 6, (&|a, b| a - b) as _)), + // b'*' => empty.value((7, 8, (&|a, b| a * b) as _)), + // b'/' => empty.value((7, 8, (&|a, b| a / b) as _)), + // b'%' => empty.value((7, 8, (&|a, b| a % b) as _)), + // b'^' => empty.value((9, 10, (&|a, b| a ^ b) as _)), + // _ => fail + // }, + // ) + // .parse_next(i) + Ok(0) +} + +fn parse_expression(c: &mut Criterion) { + // remove the last `\n` + let input = &CORPUS.as_bytes()[0..CORPUS.as_bytes().len() - 1]; + let mut group = c.benchmark_group("pratt"); + + pratt_parser.parse(input).expect("pratt should parse"); + shunting_yard_parser + .parse(input) + .expect("shunting yard should parse"); + + group.bench_function("pratt", |b| { + b.iter(|| black_box(pratt_parser.parse(input).unwrap())); + }); + + group.bench_function("shunting yard", |b| { + b.iter(|| black_box(shunting_yard_parser.parse(input).unwrap())); + }); +} + +// https://www.jibbow.com/posts/criterion-flamegraphs/ +use pprof::criterion::{Output, PProfProfiler}; +criterion_group! { + name = benches; + config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None))); + targets =parse_expression +} + +criterion_main!(benches); diff --git a/examples/pratt/bench.rs b/examples/pratt/bench.rs new file mode 100644 index 000000000..dae590b7a --- /dev/null +++ b/examples/pratt/bench.rs @@ -0,0 +1,97 @@ +mod parser; + +use std::cell::RefCell; + +use criterion::{black_box, BatchSize}; +use winnow::{prelude::*, Stateful}; + +fn pratt(c: &mut criterion::Criterion) { + let input = + "a = 2*-2 * (a ? 1 + 2 * 4 - --a.bar + 2 : 2) / ( &**foo.a->p! -+1) + 3^1 / 4 == 1 * (2 - 7 + 567 *12 /2) + 3*(1+2*( 45 /2))"; + let mut group = c.benchmark_group("pratt"); + + { + let bump = RefCell::new(bumpalo::Bump::new()); + + { + let i = Stateful { + input, + state: &*bump.borrow(), + }; + parser::pratt_parser.parse(i).expect("pratt should parse"); + } + bump.borrow_mut().reset(); + { + let i = Stateful { + input, + state: &*bump.borrow(), + }; + parser::shunting_yard_parser + .parse(i) + .expect("shunting yard should parse"); + } + bump.borrow_mut().reset(); + + { + group.bench_function("pratt", |b| { + b.iter_batched( + || { + bump.borrow_mut().reset(); + &bump + }, + |b| { + let i = Stateful { + input, + state: &*b.borrow(), + }; + black_box(parser::pratt_parser.parse(i).unwrap()); + }, + BatchSize::SmallInput, + ); + }); + } + { + group.bench_function("shunting_yard", |b| { + b.iter_batched( + || { + bump.borrow_mut().reset(); + &bump + }, + |b| { + let i = Stateful { + input, + state: &*b.borrow(), + }; + black_box(parser::shunting_yard_parser.parse(i).unwrap()); + }, + BatchSize::SmallInput, + ); + }); + } + } + + // group.bench_function("pratt_with_new_bump_each_time", |b| { + // b.iter_batched( + // || bumpalo::Bump::new(), + // |b| { + // let i = Stateful { input, state: &b }; + // black_box(parser::pratt_parser.parse(i).unwrap()); + // }, + // BatchSize::SmallInput, + // ); + // }); + // + // group.bench_function("shunting_yard_with_new_bump_each_time", |b| { + // b.iter_batched( + // || bumpalo::Bump::new(), + // |b| { + // let i = Stateful { input, state: &b }; + // black_box(parser::shunting_yard_parser.parse(i).unwrap()); + // }, + // BatchSize::SmallInput, + // ); + // }); +} + +criterion::criterion_group!(benches, pratt); +criterion::criterion_main!(benches); diff --git a/examples/pratt/main.rs b/examples/pratt/main.rs new file mode 100644 index 000000000..196bee81a --- /dev/null +++ b/examples/pratt/main.rs @@ -0,0 +1,49 @@ +use winnow::{prelude::*, Stateful}; + +mod parser; + +fn main() -> Result<(), lexopt::Error> { + let args = Args::parse()?; + + let input = args.input.as_deref().unwrap_or("1 + 1"); + let b = bumpalo::Bump::new(); + let input = Stateful { + input, + state: &b, + }; + match parser::pratt_parser.parse(input) { + Ok(result) => { + println!("{result}"); + } + Err(err) => { + println!("FAILED"); + println!("{err}"); + } + } + + Ok(()) +} + +#[derive(Default)] +struct Args { + input: Option, +} + +impl Args { + fn parse() -> Result { + use lexopt::prelude::*; + + let mut res = Args::default(); + + let mut args = lexopt::Parser::from_env(); + while let Some(arg) = args.next()? { + match arg { + Value(input) => { + res.input = Some(input.string()?); + } + _ => return Err(arg.unexpected()), + } + } + Ok(res) + } +} diff --git a/examples/pratt/parser.rs b/examples/pratt/parser.rs new file mode 100644 index 000000000..043514891 --- /dev/null +++ b/examples/pratt/parser.rs @@ -0,0 +1,802 @@ +use bumpalo::boxed::Box; +use bumpalo::collections::String as BString; +use winnow::combinator::{cut_err, empty, fail, not, opt, peek, separated_pair, trace}; +use winnow::error::ContextError; +use winnow::stream::AsChar as _; +use winnow::token::{any, take, take_while}; +use winnow::{ + ascii::{digit1, multispace0}, + combinator::alt, + combinator::delimited, + dispatch, + token::one_of, +}; +use winnow::{prelude::*, Stateful}; + +pub(crate) enum Expr<'a> { + Name(BString<'a>), + Value(i64), + + Assign(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + + Addr(Box<'a, Expr<'a>>), + Deref(Box<'a, Expr<'a>>), + + Dot(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + ArrowOp(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + Neg(Box<'a, Expr<'a>>), + Add(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + Sub(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + Mul(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + Div(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + Pow(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + Fac(Box<'a, Expr<'a>>), + + PreIncr(Box<'a, Expr<'a>>), + PostIncr(Box<'a, Expr<'a>>), + PreDecr(Box<'a, Expr<'a>>), + PostDecr(Box<'a, Expr<'a>>), + + And(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + Or(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + + // `==` + Eq(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + // `!=` + NotEq(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + // `!` + Not(Box<'a, Expr<'a>>), + Greater(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + GreaterEqual(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + Less(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + LessEqual(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + + // A parenthesized expression. + Paren(Box<'a, Expr<'a>>), + FunctionCall(Box<'a, Expr<'a>>, Option>>), + Ternary(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + // foo[...] + Index(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + // a, b + Comma(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + + // % + Rem(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + BitXor(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + BitAnd(Box<'a, Expr<'a>>, Box<'a, Expr<'a>>), + BitwiseNot(Box<'a, Expr<'a>>), +} + +type Input<'i, 'a> = Stateful<&'i str, &'a bumpalo::Bump>; + +// Parser definition + +pub(crate) fn pratt_parser<'i, 'a>(i: &mut Input<'i, 'a>) -> PResult> { + use winnow::combinator::precedence::{self, Assoc}; + // precedence is based on https://en.cppreference.com/w/c/language/operator_precedence + // but specified in reverse order, because the `cppreference` table + // uses `descending` precedence, but we need ascending one + fn parser<'i, 'a>( + start_power: i64, + ) -> impl Parser, Expr<'a>, ContextError> { + move |i: &mut Input<'i, 'a>| { + precedence::precedence( + start_power, + trace( + "operand", + delimited( + multispace0, + dispatch! {peek(any); + '(' => |i: &mut Input<'i, 'a>| { + delimited('(', parser(0).map(|e| Expr::Paren(Box::new_in(e, &*i.state))), cut_err(')')).parse_next(i) + }, + _ => alt(( + |i: &mut Input<'i, 'a>| { + identifier.map(|s| Expr::Name(BString::from_str_in(s, &*i.state))).parse_next(i) + }, + digit1.parse_to::().map(Expr::Value), + )), + }, + multispace0, + ), + ), + trace( + "prefix", + delimited( + multispace0, + dispatch! {any; + '+' => alt(( + // ++ + '+'.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::PreIncr(Box::new_in(a, i.state)))) as _)), + empty.value((18, (|_: &mut _, a| Ok(a)) as _)) + )), + '-' => alt(( + // -- + '-'.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::PreDecr(Box::new_in(a, i.state)))) as _)), + empty.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::Neg(Box::new_in(a, i.state)))) as _)) + )), + '&' => empty.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::Addr(Box::new_in(a, i.state)))) as _)), + '*' => empty.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::Deref(Box::new_in(a, i.state)))) as _)), + '!' => empty.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::Not(Box::new_in(a, i.state)))) as _)), + '~' => empty.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::BitwiseNot(Box::new_in(a, i.state)))) as _)), + _ => fail + }, + multispace0, + ), + ), + trace( + "postfix", + delimited( + multispace0, + alt(( + dispatch! {any; + '!' => not('=').value((19, (|i: &mut Input<'i, 'a>, a| Ok(Expr::Fac(Box::new_in(a, &i.state)))) as _)), + '?' => empty.value((3, (|i: &mut Input<'i, 'a>, cond| { + let (left, right) = cut_err(separated_pair(parser(0), delimited(multispace0, ':', multispace0), parser(3))).parse_next(i)?; + Ok(Expr::Ternary(Box::new_in(cond, &i.state), Box::new_in(left, &i.state), Box::new_in(right, &i.state))) + }) as _)), + '[' => empty.value((20, (|i: &mut Input<'i, 'a>, a| { + let index = delimited(multispace0, parser(0), (multispace0, cut_err(']'), multispace0)).parse_next(i)?; + Ok(Expr::Index(Box::new_in(a, &i.state), Box::new_in(index, &i.state))) + }) as _)), + '(' => empty.value((20, (|i: &mut Input<'i, 'a>, a| { + let args = delimited(multispace0, opt(parser(0)), (multispace0, cut_err(')'), multispace0)).parse_next(i)?; + Ok(Expr::FunctionCall(Box::new_in(a, &i.state), args.map(|a| Box::new_in(a, &i.state)))) + }) as _)), + _ => fail, + }, + dispatch! {take(2usize); + "++" => empty.value((20, (|i: &mut Input<'i, 'a>, a| Ok(Expr::PostIncr(Box::new_in(a, &i.state)))) as _)), + "--" => empty.value((20, (|i: &mut Input<'i, 'a>, a| Ok(Expr::PostDecr(Box::new_in(a, &i.state)))) as _)), + _ => fail, + }, + )), + multispace0, + ), + ), + trace( + "infix", + alt(( + // fail, + dispatch! {any; + '*' => alt(( + // ** + "*".value((Assoc::Right(28), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Pow(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + empty.value((Assoc::Left(16), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Mul(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + )), + '/' => empty.value((Assoc::Left(16), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Div(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + '%' => empty.value((Assoc::Left(16), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Rem(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + + '+' => empty.value((Assoc::Left(14), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Add(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + '-' => alt(( + dispatch!{take(2usize); + "ne" => empty.value((Assoc::Neither(10), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::NotEq(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "eq" => empty.value((Assoc::Neither(10), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Eq(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "gt" => empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Greater(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "ge" => empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::GreaterEqual(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "lt" => empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Less(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "le" => empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::LessEqual(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + _ => fail + }, + '>'.value((Assoc::Left(20), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::ArrowOp(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + empty.value((Assoc::Left(14), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Sub(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)) + )), + '.' => empty.value((Assoc::Left(20), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Dot(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + '&' => alt(( + // && + "&".value((Assoc::Left(6), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::And(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _) ), + + empty.value((Assoc::Left(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::BitAnd(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + )), + '^' => empty.value((Assoc::Left(8), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::BitXor(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + '=' => alt(( + // == + "=".value((Assoc::Neither(10), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Eq(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + empty.value((Assoc::Right(2), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Assign(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)) + )), + + '>' => alt(( + // >= + "=".value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::GreaterEqual(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Greater(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)) + )), + '<' => alt(( + // <= + "=".value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::LessEqual(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Less(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)) + )), + ',' => empty.value((Assoc::Left(0), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Comma(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + _ => fail + }, + dispatch! {take(2usize); + "!=" => empty.value((Assoc::Neither(10), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::NotEq(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "||" => empty.value((Assoc::Left(4), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Or(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + _ => fail + }, + )), + ), + ) + .parse_next(i) + } + } + parser(0).parse_next(i) +} + +pub(crate) fn shunting_yard_parser<'i, 'a>(i: &mut Input<'i, 'a>) -> PResult> { + use winnow::combinator::precedence::Assoc; + use winnow::combinator::shunting_yard; + // precedence is based on https://en.cppreference.com/w/c/language/operator_precedence + // but specified in reverse order, because the `cppreference` table + // uses `descending` precedence, but we need ascending one + fn parser<'i, 'a>( + start_power: i64, + ) -> impl Parser, Expr<'a>, ContextError> { + move |i: &mut Input<'i, 'a>| { + shunting_yard::precedence( + start_power, + trace( + "operand", + delimited( + multispace0, + dispatch! {peek(any); + '(' => |i: &mut Input<'i, 'a>| { + delimited('(', parser(0).map(|e| Expr::Paren(Box::new_in(e, &*i.state))), cut_err(')')).parse_next(i) + }, + _ => alt(( + |i: &mut Input<'i, 'a>| { + identifier.map(|s| Expr::Name(BString::from_str_in(s, &*i.state))).parse_next(i) + }, + digit1.parse_to::().map(Expr::Value), + )), + }, + multispace0, + ), + ), + trace( + "prefix", + delimited( + multispace0, + dispatch! {any; + '+' => alt(( + // ++ + '+'.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::PreIncr(Box::new_in(a, i.state)))) as _)), + empty.value((18, (|_: &mut _, a| Ok(a)) as _)) + )), + '-' => alt(( + // -- + '-'.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::PreDecr(Box::new_in(a, i.state)))) as _)), + empty.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::Neg(Box::new_in(a, i.state)))) as _)) + )), + '&' => empty.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::Addr(Box::new_in(a, i.state)))) as _)), + '*' => empty.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::Deref(Box::new_in(a, i.state)))) as _)), + '!' => empty.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::Not(Box::new_in(a, i.state)))) as _)), + '~' => empty.value((18, (|i: &mut Input<'i, 'a>, a| Ok(Expr::BitwiseNot(Box::new_in(a, i.state)))) as _)), + _ => fail + }, + multispace0, + ), + ), + trace( + "postfix", + delimited( + multispace0, + alt(( + dispatch! {any; + '!' => not('=').value((19, (|i: &mut Input<'i, 'a>, a| Ok(Expr::Fac(Box::new_in(a, &i.state)))) as _)), + '?' => empty.value((3, (|i: &mut Input<'i, 'a>, cond| { + let (left, right) = cut_err(separated_pair(parser(0), delimited(multispace0, ':', multispace0), parser(3))).parse_next(i)?; + Ok(Expr::Ternary(Box::new_in(cond, &i.state), Box::new_in(left, &i.state), Box::new_in(right, &i.state))) + }) as _)), + '[' => empty.value((20, (|i: &mut Input<'i, 'a>, a| { + let index = delimited(multispace0, parser(0), (multispace0, cut_err(']'), multispace0)).parse_next(i)?; + Ok(Expr::Index(Box::new_in(a, &i.state), Box::new_in(index, &i.state))) + }) as _)), + '(' => empty.value((20, (|i: &mut Input<'i, 'a>, a| { + let args = delimited(multispace0, opt(parser(0)), (multispace0, cut_err(')'), multispace0)).parse_next(i)?; + Ok(Expr::FunctionCall(Box::new_in(a, &i.state), args.map(|a| Box::new_in(a, &i.state)))) + }) as _)), + _ => fail, + }, + dispatch! {take(2usize); + "++" => empty.value((20, (|i: &mut Input<'i, 'a>, a| Ok(Expr::PostIncr(Box::new_in(a, &i.state)))) as _)), + "--" => empty.value((20, (|i: &mut Input<'i, 'a>, a| Ok(Expr::PostDecr(Box::new_in(a, &i.state)))) as _)), + _ => fail, + }, + )), + multispace0, + ), + ), + trace( + "infix", + alt(( + // fail, + dispatch! {any; + '*' => alt(( + // ** + "*".value((Assoc::Right(28), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Pow(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + empty.value((Assoc::Left(16), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Mul(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + )), + '/' => empty.value((Assoc::Left(16), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Div(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + '%' => empty.value((Assoc::Left(16), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Rem(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + + '+' => empty.value((Assoc::Left(14), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Add(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + '-' => alt(( + dispatch!{take(2usize); + "ne" => empty.value((Assoc::Neither(10), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::NotEq(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "eq" => empty.value((Assoc::Neither(10), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Eq(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "gt" => empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Greater(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "ge" => empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::GreaterEqual(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "lt" => empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Less(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "le" => empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::LessEqual(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + _ => fail + }, + '>'.value((Assoc::Left(20), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::ArrowOp(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + empty.value((Assoc::Left(14), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Sub(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)) + )), + '.' => empty.value((Assoc::Left(20), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Dot(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + '&' => alt(( + // && + "&".value((Assoc::Left(6), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::And(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _) ), + + empty.value((Assoc::Left(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::BitAnd(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + )), + '^' => empty.value((Assoc::Left(8), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::BitXor(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + '=' => alt(( + // == + "=".value((Assoc::Neither(10), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Eq(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + empty.value((Assoc::Right(2), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Assign(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)) + )), + + '>' => alt(( + // >= + "=".value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::GreaterEqual(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Greater(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)) + )), + '<' => alt(( + // <= + "=".value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::LessEqual(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + empty.value((Assoc::Neither(12), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Less(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)) + )), + ',' => empty.value((Assoc::Left(0), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Comma(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + _ => fail + }, + dispatch! {take(2usize); + "!=" => empty.value((Assoc::Neither(10), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::NotEq(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + "||" => empty.value((Assoc::Left(4), (|i: &mut Input<'i, 'a>, a, b| Ok(Expr::Or(Box::new_in(a, &i.state), Box::new_in(b, &i.state)))) as _)), + _ => fail + }, + )), + ), + ) + .parse_next(i) + } + } + parser(0).parse_next(i) +} + + +fn identifier<'i, 'a>(i: &mut Input<'i, 'a>) -> PResult<&'i str> { + trace( + "identifier", + ( + one_of(|c: char| c.is_alpha() || c == '_'), + take_while(0.., |c: char| c.is_alphanum() || c == '_'), + ), + ) + .take() + .parse_next(i) +} + +impl Expr<'_> { + fn fmt_ast_with_indent( + &self, + indent: u32, + f: &mut core::fmt::Formatter<'_>, + ) -> core::fmt::Result { + for _ in 0..indent { + write!(f, " ")?; + } + macro_rules! binary_fmt { + ($a:ident, $b:ident, $name:literal) => {{ + writeln!(f, $name)?; + $a.fmt_ast_with_indent(indent + 1, f)?; + $b.fmt_ast_with_indent(indent + 1, f) + }}; + } + macro_rules! unary_fmt { + ($a:ident, $name:literal) => {{ + writeln!(f, $name)?; + $a.fmt_ast_with_indent(indent + 1, f) + }}; + } + match self { + Self::Name(name) => writeln!(f, "NAME {name}"), + Self::Value(value) => writeln!(f, "VAL {value}"), + Self::Addr(a) => unary_fmt!(a, "ADDR"), + Self::Deref(a) => unary_fmt!(a, "DEREF"), + Self::Neg(a) => unary_fmt!(a, "NEG"), + Self::Fac(a) => unary_fmt!(a, "FAC"), + Self::PreIncr(a) => unary_fmt!(a, "PRE_INCR"), + Self::PostIncr(a) => unary_fmt!(a, "POST_INCR"), + Self::PreDecr(a) => unary_fmt!(a, "PRE_DECR"), + Self::PostDecr(a) => unary_fmt!(a, "POST_DECR"), + Self::Not(a) => unary_fmt!(a, "NOT"), + Self::BitwiseNot(a) => unary_fmt!(a, "BIT_NOT"), + Self::Paren(a) => unary_fmt!(a, "PAREN"), + Self::Assign(a, b) => binary_fmt!(a, b, "ASSIGN"), + Self::ArrowOp(a, b) => binary_fmt!(a, b, "ARROW"), + Self::Dot(a, b) => binary_fmt!(a, b, "ARROW"), + Self::FunctionCall(a, b) => { + writeln!(f, "CALL")?; + a.fmt_ast_with_indent(indent + 1, f)?; + if let Some(b) = b { + b.fmt_ast_with_indent(indent + 1, f)?; + } + Ok(()) + } + Self::Add(a, b) => binary_fmt!(a, b, "ADD"), + Self::Sub(a, b) => binary_fmt!(a, b, "SUB"), + Self::Mul(a, b) => binary_fmt!(a, b, "MUL"), + Self::Div(a, b) => binary_fmt!(a, b, "DIV"), + Self::Pow(a, b) => binary_fmt!(a, b, "POW"), + Self::And(a, b) => binary_fmt!(a, b, "AND"), + Self::Or(a, b) => binary_fmt!(a, b, "OR"), + Self::Eq(a, b) => binary_fmt!(a, b, "EQ"), + Self::NotEq(a, b) => binary_fmt!(a, b, "NEQ"), + Self::Greater(a, b) => binary_fmt!(a, b, "GREATER"), + Self::GreaterEqual(a, b) => binary_fmt!(a, b, "GTEQ"), + Self::Less(a, b) => binary_fmt!(a, b, "LESS"), + Self::LessEqual(a, b) => binary_fmt!(a, b, "LESSEQ"), + Self::BitXor(a, b) => binary_fmt!(a, b, "BIT_XOR"), + Self::Rem(a, b) => binary_fmt!(a, b, "REM"), + Self::BitAnd(a, b) => binary_fmt!(a, b, "BIT_AND"), + Self::Index(a, b) => binary_fmt!(a, b, "INDEX"), + Self::Comma(a, b) => binary_fmt!(a, b, "COMMA"), + Self::Ternary(cond, a, b) => { + writeln!(f, "TERNARY")?; + cond.fmt_ast_with_indent(indent + 1, f)?; + a.fmt_ast_with_indent(indent + 2, f)?; + b.fmt_ast_with_indent(indent + 2, f) + } + } + } + fn fmt_delimited(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::Name(name) => return write!(f, "{name}"), + Self::Value(value) => return write!(f, "{value}"), + Self::Paren(a) => return a.fmt_delimited(f), + _ => (), + } + macro_rules! unary { + ($op:literal, $a:ident) => {{ + write!(f, $op)?; + $a.fmt_delimited(f)?; + }}; + } + macro_rules! binary { + ($op:literal, $a:ident, $b:ident) => {{ + write!(f, "{} ", $op)?; + $a.fmt_delimited(f)?; + write!(f, " ")?; + $b.fmt_delimited(f)?; + }}; + } + write!(f, "(")?; + match self { + Self::Assign(a, b) => binary!("=", a, b), + Self::FunctionCall(a, b) => { + write!(f, "call ")?; + a.fmt_delimited(f)?; + if let Some(b) = b { + write!(f, " ")?; + b.fmt_delimited(f)?; + } + } + Self::ArrowOp(a, b) => binary!("->", a, b), + Self::Dot(a, b) => binary!(".", a, b), + Self::Addr(a) => unary!("&", a), + Self::Deref(a) => unary!("*", a), + Self::Neg(a) => unary!("-", a), + Self::Fac(a) => unary!("!", a), + Self::Not(a) => unary!("!", a), + Self::BitwiseNot(a) => unary!("~", a), + Self::PreIncr(a) => unary!("pre++", a), + Self::PostIncr(a) => unary!("post++", a), + Self::PreDecr(a) => unary!("pre--", a), + Self::PostDecr(a) => unary!("post--", a), + Self::Add(a, b) => binary!("+", a, b), + Self::Sub(a, b) => binary!("-", a, b), + Self::Mul(a, b) => binary!("*", a, b), + Self::Div(a, b) => binary!("/", a, b), + Self::Pow(a, b) => binary!("**", a, b), + Self::And(a, b) => binary!("&&", a, b), + Self::Or(a, b) => binary!("||", a, b), + Self::Eq(a, b) => binary!("==", a, b), + Self::NotEq(a, b) => binary!("!=", a, b), + Self::Greater(a, b) => binary!(">", a, b), + Self::GreaterEqual(a, b) => binary!(">=", a, b), + Self::Less(a, b) => binary!("<", a, b), + Self::LessEqual(a, b) => binary!("<=", a, b), + Self::BitXor(a, b) => binary!("^", a, b), + Self::Rem(a, b) => binary!("%", a, b), + Self::BitAnd(a, b) => binary!("&", a, b), + Self::Index(a, b) => binary!("[]", a, b), + Self::Comma(a, b) => binary!(",", a, b), + Self::Ternary(cond, a, b) => { + write!(f, "? ")?; + cond.fmt_delimited(f)?; + write!(f, " ")?; + a.fmt_delimited(f)?; + write!(f, " ")?; + b.fmt_delimited(f)?; + } + _ => unreachable!(), + } + + write!(f, ")") + } +} + +impl core::fmt::Display for Expr<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.fmt_ast_with_indent(0, f)?; + writeln!(f)?; + self.fmt_delimited(f) + } +} + +#[cfg(test)] +mod test { + + #[allow(clippy::useless_attribute)] + #[allow(unused_imports)] // its dead for benches + use super::*; + + #[allow(dead_code)] + // to invoke fmt_delimited() + struct PrefixNotation<'a>(Expr<'a>); + + impl core::fmt::Display for PrefixNotation<'_> { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + self.0.fmt_delimited(f) + } + } + + #[allow(dead_code)] + fn parse(i: &str) -> Result { + let b = bumpalo::Bump::new(); + let i = Stateful { + input: i, + state: &b, + }; + let s = pratt_parser + .parse(i) + .map(|r| format!("{}", PrefixNotation(r))); + s.map_err(|e| format!("{:?}", e)) + } + + #[allow(dead_code)] + fn parse_ok(i: &str, expect: &str) { + assert_eq!(parse(i).unwrap(), expect); + } + + #[test] + fn op() { + parse_ok(" 1 ", "1"); + } + + #[test] + fn neither() { + assert!(parse("1 == 2 == 3").is_err()); + assert!(parse("1 -le 2 -gt 3").is_err()); + assert!(parse("1 < 2 < 3").is_err()); + assert!(parse("1 != 2 == 3").is_err()); + } + + #[test] + fn equal() { + parse_ok("x=3", "(= x 3)"); + parse_ok("x = 2*3", "(= x (* 2 3))"); + parse_ok("x = y", "(= x y)"); + parse_ok("a = b = 10", "(= a (= b 10))"); + parse_ok("x = ((y*4)-2)", "(= x (- (* y 4) 2))"); + } + + #[test] + fn unary() { + parse_ok("- - a", "(-(-a))"); + parse_ok("+ - a", "(-a)"); + parse_ok("++ -- a", "(pre++(pre--a))"); + parse_ok("a ++ --", "(post--(post++a))"); + parse_ok("!x", "(!x)"); + parse_ok("x--", "(post--x)"); + parse_ok("x[1]--", "(post--([] x 1))"); + parse_ok("--x", "(pre--x)"); + parse_ok("++x[1]", "(pre++([] x 1))"); + parse_ok("!x--", "(!(post--x))"); + parse_ok("~x++", "(~(post++x))"); + parse_ok("x++ - y++", "(- (post++x) (post++y))"); + parse_ok("++x - ++y", "(- (pre++x) (pre++y))"); + parse_ok("--1 * 2", "(* (pre--1) 2)"); + parse_ok("--f . g", "(pre--(. f g))"); + } + + #[test] + fn same_precedence() { + // left associative + parse_ok("1 + 2 + 3", "(+ (+ 1 2) 3)"); + parse_ok("1 - 2 - 3", "(- (- 1 2) 3)"); + parse_ok("1 * 2 * 3", "(* (* 1 2) 3)"); + parse_ok("1 / 2 / 3", "(/ (/ 1 2) 3)"); + parse_ok("1 % 2 % 3", "(% (% 1 2) 3)"); + parse_ok("1 ^ 2 ^ 3", "(^ (^ 1 2) 3)"); + parse_ok("+-+1", "(-1)"); + parse_ok("f . g . h", "(. (. f g) h)"); + parse_ok("++--++1", "(pre++(pre--(pre++1)))"); + // right associative + parse_ok("2 ** 3 ** 2", "(** 2 (** 3 2))"); + } + + #[test] + fn different_precedence() { + parse_ok("1 + 2 * 3", "(+ 1 (* 2 3))"); + parse_ok("1 + 2 * 3 - 4 / 5", "(- (+ 1 (* 2 3)) (/ 4 5))"); + parse_ok("a + b * c * d + e", "(+ (+ a (* (* b c) d)) e)"); + parse_ok("1 + ++2 * 3 * 5 + 6", "(+ (+ 1 (* (* (pre++2) 3) 5)) 6)"); + parse_ok("**3 + &1", "(+ (*(*3)) (&1))"); + parse_ok("x*y - y*z", "(- (* x y) (* y z))"); + parse_ok("x/y - y%z", "(- (/ x y) (% y z))"); + parse_ok("1<2 * 3", "(< 1 (* 2 3))"); + parse_ok( + " 1 + 2 + f . g . h * 3 * 4", + "(+ (+ 1 2) (* (* (. (. f g) h) 3) 4))", + ); + } + + #[test] + fn prefix_postfix_power() { + // https://en.cppreference.com/w/c/language/operator_precedence + // `post++` has `1`, `pre--` and `*` have 2 + parse_ok("--**3++", "(pre--(*(*(post++3))))"); + parse_ok("**--3++", "(*(*(pre--(post++3))))"); + parse_ok("&foo()[0]", "(&([] (call foo) 0))"); + parse_ok("-9!", "(-(!9))"); + parse_ok("f . g !", "(!(. f g))"); + } + + #[test] + fn prefix_infix() { + parse_ok("x - -y", "(- x (-y))"); + parse_ok("-1 * -2", "(* (-1) (-2))"); + parse_ok("-x * -y", "(* (-x) (-y))"); + parse_ok("x - -234", "(- x (-234))"); + } + + #[test] + fn ternary() { + parse_ok("a ? 2 + c : -2 * 2", "(? a (+ 2 c) (* (-2) 2))"); + parse_ok("a ? b : c ? d : e", "(? a b (? c d e))"); + parse_ok("2! > 1 ? 3 : 1", "(? (> (!2) 1) 3 1)"); + parse_ok( + "2 > 1 ? 1 -ne 3 ? 4 : 5 : 1", + "(? (> 2 1) (? (!= 1 3) 4 5) 1)", + ); + parse_ok("a > b ? 0 : 1", "(? (> a b) 0 1)"); + parse_ok("a > b ? x+1 : y+1", "(? (> a b) (+ x 1) (+ y 1))"); + parse_ok( + "1 ? true1 : 2 ? true2 : false", + "(? 1 true1 (? 2 true2 false))", + ); + parse_ok( + "1 ? true1 : (2 ? true2 : false)", + "(? 1 true1 (? 2 true2 false))", + ); + + parse_ok( + "1 ? (2 ? true : false1) : false2", + "(? 1 (? 2 true false1) false2)", + ); + parse_ok( + "1 ? 2 ? true : false1 : false2", + "(? 1 (? 2 true false1) false2)", + ); + } + + #[test] + fn comma() { + parse_ok("x=1,y=2,z=3", "(, (, (= x 1) (= y 2)) (= z 3))"); + parse_ok("a, b, c", "(, (, a b) c)"); + parse_ok("(a, b, c)", "(, (, a b) c)"); + parse_ok("f(a, b, c), d", "(, (call f (, (, a b) c)) d)"); + parse_ok("(a, b, c), d", "(, (, (, a b) c) d)"); + } + + #[test] + fn comma_ternary() { + parse_ok("x ? 1 : 2, y ? 3 : 4", "(, (? x 1 2) (? y 3 4))"); + parse_ok("x ? 1 : 2 ? 3 : 4", "(? x 1 (? 2 3 4))"); + // Comma expressions can be inside + parse_ok("a , b ? c, d : e, f", "(, (, a (? b (, c d) e)) f)"); + parse_ok("a = 0 ? b : c = d", "(= a (= (? 0 b c) d))"); + } + + #[test] + fn braces() { + parse_ok("4*(2+3)", "(* 4 (+ 2 3))"); + parse_ok("(2+3)*4", "(* (+ 2 3) 4)"); + parse_ok("(((0)))", "0"); + } + + #[test] + fn logical() { + parse_ok("a && b || c && d", "(|| (&& a b) (&& c d))"); + parse_ok("!a && !b", "(&& (!a) (!b))"); + parse_ok("a != b && c == d", "(&& (!= a b) (== c d))"); + } + + #[test] + fn array() { + parse_ok("x[1,2]", "([] x (, 1 2))"); + parse_ok("x[1]", "([] x 1)"); + parse_ok("x[a+b]", "([] x (+ a b))"); + parse_ok("c = pal[i*8]", "(= c ([] pal (* i 8)))"); + parse_ok("f[x] = 1", "(= ([] f x) 1)"); + parse_ok("x[0][1]", "([] ([] x 0) 1)"); + } + + #[test] + fn function_call() { + parse_ok("a()", "(call a)"); + parse_ok("a(+1)", "(call a 1)"); + parse_ok("a()+1", "(+ (call a) 1)"); + parse_ok("f(a, b, c)", "(call f (, (, a b) c))"); + parse_ok("print(x)", "(call print x)"); + parse_ok( + "x = y(2)*3 + y(4)*5", + "(= x (+ (* (call y 2) 3) (* (call y 4) 5)))", + ); + parse_ok("x(1,2)+y(3,4)", "(+ (call x (, 1 2)) (call y (, 3 4)))"); + parse_ok("x(a,b,c[d])", "(call x (, (, a b) ([] c d)))"); + parse_ok( + "x(1,2)*j+y(3,4)*k+z(5,6)*l", + "(+ (+ (* (call x (, 1 2)) j) (* (call y (, 3 4)) k)) (* (call z (, 5 6)) l))", + ); + parse_ok("print(test(2,3))", "(call print (call test (, 2 3)))"); + parse_ok("min(255,n*2)", "(call min (, 255 (* n 2)))"); + } + + #[test] + fn member_access() { + parse_ok("a.b", "(. a b)"); + parse_ok("a.b.c", "(. (. a b) c)"); + parse_ok("a->b", "(-> a b)"); + parse_ok("++a->b", "(pre++(-> a b))"); + parse_ok("a++ ->b", "(-> (post++a) b)"); + parse_ok("a.(x)", "(. a x)"); + parse_ok("a.(x+3)", "(. a (+ x 3))"); + } + + #[test] + fn errors() { + assert!(parse("x + a b").is_err()); + assert!(parse("x[a b]").is_err()); + assert!(parse("x[a)]").is_err()); + assert!(parse("x(a])").is_err()); + assert!(parse("[a + b]").is_err()); + assert!(parse("[a b]").is_err()); + assert!(parse("+").is_err()); + assert!(parse("a +").is_err()); + assert!(parse("<=").is_err()); + assert!(parse("<= - a + b").is_err()); + assert!(parse("a b").is_err()); + assert!(parse("a + b @").is_err()); + assert!(parse("a + b )").is_err()); + assert!(parse("( a + b").is_err()); + assert!(parse("( a + b) c").is_err()); + assert!(parse("f ( a + b ) c").is_err()); + assert!(parse("@ a + b").is_err()); + assert!(parse("a @ b").is_err()); + assert!(parse("(a @ b)").is_err()); + assert!(parse(")").is_err()); + } +} diff --git a/src/combinator/mod.rs b/src/combinator/mod.rs index df791adaa..e9a6d5f01 100644 --- a/src/combinator/mod.rs +++ b/src/combinator/mod.rs @@ -166,6 +166,9 @@ mod multi; mod parser; mod sequence; +pub mod shunting_yard; +pub mod precedence; + #[cfg(test)] mod tests; diff --git a/src/combinator/precedence.rs b/src/combinator/precedence.rs new file mode 100644 index 000000000..a9758ac97 --- /dev/null +++ b/src/combinator/precedence.rs @@ -0,0 +1,208 @@ +use crate::{ + combinator::{opt, trace}, + error::{ErrMode, ParserError}, + stream::{Stream, StreamIsPartial}, + PResult, Parser, +}; + +/// Parses an expression based on operator precedence. +#[doc(alias = "pratt")] +#[doc(alias = "separated")] +#[doc(alias = "shunting_yard")] +#[doc(alias = "precedence_climbing")] +#[inline(always)] +pub fn precedence( + start_power: i64, + mut operand: ParseOperand, + mut prefix: ParsePrefix, + mut postfix: ParsePostfix, + mut infix: ParseInfix, +) -> impl Parser +where + I: Stream + StreamIsPartial, + ParseOperand: Parser, + ParseInfix: Parser PResult), E>, + ParsePrefix: Parser PResult), E>, + ParsePostfix: Parser PResult), E>, + E: ParserError, +{ + trace("precedence", move |i: &mut I| { + let result = precedence_impl( + i, + &mut operand, + &mut prefix, + &mut postfix, + &mut infix, + start_power, + )?; + Ok(result) + }) +} + +#[derive(Debug, Clone, Copy)] +pub enum Assoc { + Left(i64), + Right(i64), + Neither(i64), +} + +// recursive function +fn precedence_impl( + i: &mut I, + parse_operand: &mut ParseOperand, + prefix: &mut ParsePrefix, + postfix: &mut ParsePostfix, + infix: &mut ParseInfix, + min_power: i64, +) -> PResult +where + I: Stream + StreamIsPartial, + ParseOperand: Parser, + ParseInfix: Parser PResult), E>, + ParsePrefix: Parser PResult), E>, + ParsePostfix: Parser PResult), E>, + E: ParserError, +{ + let operand = opt(parse_operand.by_ref()).parse_next(i)?; + let mut operand = if let Some(operand) = operand { + operand + } else { + // Prefix unary operators + let len = i.eof_offset(); + let (power, fold_prefix) = prefix.parse_next(i)?; + // infinite loop check: the parser must always consume + if i.eof_offset() == len { + return Err(ErrMode::assert(i, "`prefix` parsers must always consume")); + } + let operand = precedence_impl(i, parse_operand, prefix, postfix, infix, power)?; + fold_prefix(i, operand)? + }; + + // A variable to stop the `'parse` loop when `Assoc::Neither` with the same + // precedence is encountered e.g. `a == b == c`. `Assoc::Neither` has similar + // associativity rules as `Assoc::Left`, but we stop parsing when the next operator + // is the same as the current one. + let mut prev_op_is_neither = None; + 'parse: while i.eof_offset() > 0 { + // Postfix unary operators + let start = i.checkpoint(); + if let Some((power, fold_postfix)) = opt(postfix.by_ref()).parse_next(i)? { + // control precedence over the prefix e.g.: + // `--(i++)` or `(--i)++` + if power < min_power { + i.reset(&start); + break 'parse; + } + operand = fold_postfix(i, operand)?; + + continue 'parse; + } + + // Infix binary operators + let start = i.checkpoint(); + let parse_result = opt(infix.by_ref()).parse_next(i)?; + if let Some((assoc, fold_infix)) = parse_result { + let mut is_neither = None; + let (lpower, rpower) = match assoc { + Assoc::Right(p) => (p, p - 1), + Assoc::Left(p) => (p, p + 1), + Assoc::Neither(p) => { + is_neither = Some(p); + (p, p + 1) + } + }; + if lpower < min_power || prev_op_is_neither.is_some_and(|p| lpower == p) { + i.reset(&start); + break 'parse; + } + prev_op_is_neither = is_neither; + let rhs = precedence_impl(i, parse_operand, prefix, postfix, infix, rpower)?; + operand = fold_infix(i, operand, rhs)?; + + continue 'parse; + } + + break 'parse; + } + + Ok(operand) +} + +#[cfg(test)] +mod tests { + use crate::ascii::{digit1, space0}; + use crate::combinator::{delimited, empty, fail, peek}; + use crate::dispatch; + use crate::error::ContextError; + use crate::token::any; + + use super::*; + + fn factorial(x: i32) -> i32 { + if x == 0 { + 1 + } else { + x * factorial(x - 1) + } + } + fn parser<'i>() -> impl Parser<&'i str, i32, ContextError> { + move |i: &mut &str| { + precedence( + 0, + trace( + "operand", + delimited( + space0, + dispatch! {peek(any); + '(' => delimited('(', parser(), ')'), + _ => digit1.parse_to::() + }, + space0, + ), + ), + trace( + "prefix", + dispatch! {any; + '+' => empty.value((9, (|_: &mut _, a| Ok(a)) as _)), + '-' => empty.value((9, (|_: &mut _, a: i32| Ok(-a)) as _)), + _ => fail + }, + ), + trace( + "postfix", + dispatch! {any; + '!' => empty.value((9, (|_: &mut _, a| {Ok(factorial(a))}) as _)), + _ => fail + }, + ), + trace( + "infix", + dispatch! {any; + '+' => empty.value((Assoc::Left(5), (|_: &mut _, a, b| Ok(a + b)) as _ )), + '-' => empty.value((Assoc::Left(5), (|_: &mut _, a, b| Ok(a - b)) as _)), + '*' => empty.value((Assoc::Left(7), (|_: &mut _, a, b| Ok(a * b)) as _)), + '/' => empty.value((Assoc::Left(7), (|_: &mut _, a, b| Ok(a / b)) as _)), + '%' => empty.value((Assoc::Left(7), (|_: &mut _, a, b| Ok(a % b)) as _)), + '^' => empty.value((Assoc::Right(9), (|_: &mut _, a, b| Ok(a ^ b)) as _)), + _ => fail + }, + ), + ) + .parse_next(i) + } + } + + #[test] + fn test_precedence() { + // assert_eq!(parser().parse("-3!+-3 * 4"), Ok(-18)); + // assert_eq!(parser().parse("+2 + 3 * 4"), Ok(14)); + assert_eq!(parser().parse("2 * 3+4"), Ok(10)); + } + #[test] + fn test_unary() { + assert_eq!(parser().parse("-2"), Ok(-2)); + assert_eq!(parser().parse("4!"), Ok(24)); + assert_eq!(parser().parse("2 + 4!"), Ok(26)); + assert_eq!(parser().parse("-2 + 2"), Ok(0)); + } +} diff --git a/src/combinator/shunting_yard.rs b/src/combinator/shunting_yard.rs new file mode 100644 index 000000000..9e92c433f --- /dev/null +++ b/src/combinator/shunting_yard.rs @@ -0,0 +1,289 @@ +use crate::combinator::opt; +use crate::error::{ErrMode, ErrorKind, ParserError}; +use crate::stream::{Stream, StreamIsPartial}; +use crate::{PResult, Parser}; + +use super::precedence::Assoc; +use super::trace; + +pub fn precedence( + start_precedence: i64, + mut operand: ParseOperand, + mut prefix: ParsePrefix, + mut postfix: ParsePostfix, + mut infix: ParseInfix, +) -> impl Parser +where + I: Stream + StreamIsPartial, + ParseOperand: Parser, + ParseInfix: Parser PResult), E>, + ParsePrefix: Parser PResult), E>, + ParsePostfix: Parser PResult), E>, + E: ParserError, +{ + trace("precedence", move |i: &mut I| { + let result = shunting_yard( + start_precedence, + i, + operand.by_ref(), + prefix.by_ref(), + postfix.by_ref(), + infix.by_ref(), + )?; + Ok(result) + }) +} + +fn shunting_yard( + start_precedence: i64, + i: &mut I, + mut operand: ParseOperand, + mut prefix: ParsePrefix, + mut postfix: ParsePostfix, + mut infix: ParseInfix, +) -> PResult +where + I: Stream + StreamIsPartial, + ParseOperand: Parser, + ParseInfix: Parser PResult), E>, + ParsePrefix: Parser PResult), E>, + ParsePostfix: Parser PResult), E>, + E: ParserError, +{ + // a stack for computing the result + let mut value_stack = Vec::::new(); + let mut operator_stack = Vec::>::new(); + + let mut current_is_neither = None; + 'parse: loop { + // Prefix unary operators + while let Some((lpower, op)) = opt(prefix.by_ref()).parse_next(i)? { + // prefix operators never trigger the evaluation of pending operators + operator_stack.push(Operator::Unary(lpower, op)); + } + + // Operand + if let Some(operand) = opt(operand.by_ref()).parse_next(i)? { + value_stack.push(operand); + } else { + // error missing operand + return Err(ErrMode::from_error_kind(i, ErrorKind::Token)); + } + + if i.eof_offset() <= 0 { + break 'parse; + } + + // Postfix unary operators + while let Some((lpower, op)) = opt(postfix.by_ref()).parse_next(i)? { + while operator_stack.last().is_some_and(|op| { + let rpower = op.right_power(); + lpower < rpower + }) { + evaluate( + i, + &mut value_stack, + operator_stack.pop().expect("already checked"), + )?; + } + // postfix operators are never put in pending state in `operator_stack` + let lhs = value_stack.pop().expect("value"); + value_stack.push(op(i, lhs)?); + } + let start = i.checkpoint(); + // Infix binary operators + if let Some((assoc, op)) = opt(infix.by_ref()).parse_next(i)? { + let mut next_is_neither = None; + let lpower = match assoc { + Assoc::Left(p) => p, + Assoc::Right(p) => p, + Assoc::Neither(p) => { + next_is_neither = Some(p); + p + } + }; + if current_is_neither.is_some_and(|n| n == lpower) { + i.reset(&start); + break 'parse; + } + + while operator_stack.last().is_some_and(|op| { + let rpower = op.right_power(); + lpower < rpower + }) { + evaluate( + i, + &mut value_stack, + operator_stack.pop().expect("already checked"), + )?; + } + current_is_neither = next_is_neither; + // some hackery around `a ? b : c, end` -> `(, (? a b c) end)` + // needs refactoring + if start_precedence <= lpower { + operator_stack.push(Operator::Binary(assoc, op)); + } else { + i.reset(&start); + break 'parse; + } + } else { + // no more operators + break 'parse; + } + } + + while let Some(op) = operator_stack.pop() { + evaluate(i, &mut value_stack, op)?; + } + // TODO: when it could happen? + // if eval_stack.len() > 1 { + // // Error: value left on stack + // } + + Ok(value_stack.pop().expect("well-formed expression")) // TODO: error handling +} + +enum Operator { + // left binding power for the postfix or the right one for the prefix + Unary(i64, fn(&mut I, Operand) -> PResult), + // left binding power and right binding power for the infix operator + Binary(Assoc, fn(&mut I, Operand, Operand) -> PResult), +} + +impl Operator { + fn right_power(&self) -> i64 { + match self { + Operator::Unary(p, _) => *p, + Operator::Binary(Assoc::Left(p), _) => *p + 1, + Operator::Binary(Assoc::Right(p), _) => *p - 1, + Operator::Binary(Assoc::Neither(p), _) => *p + 1, + } + } +} + +fn evaluate( + i: &mut I, + stack: &mut Vec, + op: Operator, +) -> PResult<(), E> { + match op { + Operator::Unary(_, op) => { + let lhs = stack.pop().expect("value"); + stack.push(op(i, lhs)?); + } + Operator::Binary(_, op) => { + // TODO: confirm invariants. It should be already checked by the parser algorithm itself + let rhs = stack.pop().expect("value"); + let lhs = stack.pop().expect("value"); + let folded = op(i, lhs, rhs)?; + stack.push(folded); + } + }; + Ok(()) +} + +fn unwind_operators_stack_to( + i: &mut I, + start_precedence: i64, + current_power: Assoc, + value_stack: &mut Vec, + operator_stack: &mut Vec>, +) -> PResult<(), E> { + let mut current_is_neither = None; + while operator_stack.last().is_some_and(|op| { + let rpower = op.right_power(); + let mut next_is_neither = None; + let lpower = match current_power { + Assoc::Left(p) => p, + Assoc::Right(p) => p, + Assoc::Neither(p) => { + next_is_neither = Some(p); + p + } + }; + dbg!( + lpower, + rpower, + start_precedence, + current_is_neither, + next_is_neither + ); + let r = lpower < rpower + && lpower < start_precedence + && current_is_neither.is_none_or(|n| n != lpower); + current_is_neither = next_is_neither; + r + }) { + evaluate( + i, + value_stack, + operator_stack.pop().expect("already checked"), + )?; + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use crate::{ + ascii::digit1, + combinator::{cut_err, delimited, empty, fail, peek}, + dispatch, + token::any, + }; + + use super::*; + + fn parser(i: &mut &str) -> PResult { + precedence( + 0, + trace( + "operand", + dispatch! {peek(any); + '(' => delimited('(', trace("recursion", parser), cut_err(')')), + _ => digit1.parse_to::() + }, + ), + trace( + "prefix", + dispatch! {any; + '+' => trace("+", empty).value((20, (|_: &mut _, a| Ok(a)) as _)), + '-' => trace("-", empty).value((20, (|_: &mut _,a: i32| Ok(-a)) as _)), + _ => fail + }, + ), + trace("postfix", fail), + trace( + "infix", + dispatch! {any; + '+' => trace("+", empty).value((Assoc::Left(5), (|_: &mut _, a, b| { + println!("({a} + {b})"); + Ok(a + b) + }) as _)), + '-' => trace("-", empty).value((Assoc::Left(5), (|_: &mut _, a, b| { + println!("({a} - {b})"); + Ok(a - b) + }) as _)), + '*' => trace("*", empty).value((Assoc::Left(7), (|_: &mut _, a, b|{ + println!("({a} * {b})"); + Ok(a * b) + }) as _)), + '/' => trace("/", empty).value((Assoc::Left(7), (|_: &mut _, a, b| { + println!("({a} / {b})"); + Ok(a / b) + }) as _)), + _ => fail + }, + ), + ) + .parse_next(i) + } + + #[test] + fn test_parser() { + // assert_eq!(parser.parse("1==2==3"), Ok(11)); + assert_eq!(parser.parse("1+4+6"), Ok(11)); + // assert_eq!(parser.parse("2*(4+6)"), Ok(20)); + // assert!(matches!(parser.parse("2*"), Err(_))); + } +}