Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
fed8c90
feat: implement Pratt parser
39555 Nov 10, 2024
6a488c2
PoC: Pratt parsing with `shunting yard` algorithm
39555 Nov 14, 2024
ee4459d
commit suggestion
39555 Nov 16, 2024
4b1499d
remove spaces from #[doc(alias = "...")]
39555 Nov 16, 2024
acf4577
remove `UnaryOp` and `BinaryOp` in favor of `Fn`
39555 Nov 16, 2024
a816a1c
remove redundant trait impl
39555 Nov 16, 2024
2a80e65
remove `allow_unused`, move `allow(non_snake_case)` to where it shoul…
39555 Nov 16, 2024
29fe18d
stop dumping pratt into `combinator` namespace
39555 Nov 16, 2024
5a4f4b4
move important things to go first
39555 Nov 16, 2024
919a1cb
strip fancy api for now
39555 Nov 16, 2024
0273a29
remove wrong and long doc for now
39555 Nov 16, 2024
ebddbb8
rename `unwind_operators_stack` -> `unwind_operators_stack_to`
39555 Nov 16, 2024
9139d58
Merge branch 'pratt' into pratt-perf
39555 Nov 14, 2024
086fda9
Merge branch 'shunting-yard' into pratt-perf
39555 Nov 16, 2024
b31904b
perf: benchmarks for pratt parsers
39555 Nov 14, 2024
efd1361
rebase onto the latest changes from both parsers
39555 Nov 16, 2024
46ea58d
refactor: make steps more distinct
39555 Nov 16, 2024
73d8780
perf: benchmarks for pratt parsers
39555 Nov 14, 2024
47542e2
rebase onto the latest changes from both parsers
39555 Nov 16, 2024
f218911
fix: precedence for associativity, remove `trace()`
39555 Nov 16, 2024
3d7ef41
switch from `&dyn Fn(O) -> O` to `fn(O) -> O`
39555 Nov 17, 2024
a6cbc1a
feat: pass Input into operator closures
39555 Nov 17, 2024
29b64fa
add `trace` for `tests` parser
39555 Nov 17, 2024
b31a3a3
feat: operator closures must return PResult
39555 Nov 18, 2024
33c82f3
feat: allow the user to specify starting power
39555 Nov 18, 2024
040dd85
feat: enum `Assoc` for infix operators. Add `Neither` associativity
39555 Nov 19, 2024
6d88dff
fix: switch to i64, fix precedence checking
39555 Nov 19, 2024
8f18fc2
example: pratt expression parser
39555 Nov 17, 2024
a4ad844
feat: complex postfix operators
39555 Nov 17, 2024
54cb315
pratt_example: operator closures return PResult
39555 Nov 18, 2024
d6da343
test: add tests
39555 Nov 18, 2024
c1a8535
specify the parser start precedence
39555 Nov 18, 2024
a85291b
style: fix indentation
39555 Nov 18, 2024
39cc484
refactor: remove unnecessarily multispace0
39555 Nov 18, 2024
c52c10d
fix: failed tests
39555 Nov 18, 2024
d3c3d0a
use `Assoc` enum. tests for associativity `Neither`
39555 Nov 19, 2024
b7b0629
fix: switch to i64
39555 Nov 19, 2024
5e7fb65
tests ill-formed expressions
39555 Nov 19, 2024
7b6e3e0
update benchmark
39555 Nov 19, 2024
63e30e1
PoC: Pratt parsing with `shunting yard` algorithm
39555 Nov 14, 2024
4ff9b25
rename `unwind_operators_stack` -> `unwind_operators_stack_to`
39555 Nov 16, 2024
7b82b0e
refactor: make steps more distinct
39555 Nov 16, 2024
be02d0a
update shunting_yard
39555 Nov 19, 2024
9aae69c
Merge branch 'shunting-yard' into pratt-perf
39555 Nov 19, 2024
2a95e93
upd
39555 Nov 19, 2024
bc07708
example: use `bumpalo` allocator
39555 Nov 20, 2024
aa0a18f
bench: pratt vs shunting_yard with `bumpalo`
39555 Nov 20, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
396 changes: 385 additions & 11 deletions Cargo.lock

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,8 @@ memchr = { version = "2.5", optional = true, default-features = false }
terminal_size = { version = "0.4.0", optional = true }

[dev-dependencies]
bumpalo = { version ="3.14.0", features = ["boxed", "collections"] }
pprof = { version = "0.14", features = ["flamegraph", "criterion"] }
doc-comment = "0.3"
proptest = "1.2.0"
criterion = "0.5.1"
Expand Down Expand Up @@ -197,6 +199,17 @@ required-features = ["alloc"]
name = "string"
required-features = ["alloc"]

[[example]]
name = "pratt"
required-features = ["std"]


[[bench]]
name = "pratt-example"
path = "examples/pratt/bench.rs"
harness = false
required-features = ["std"]

[[bench]]
name = "arithmetic"
path = "examples/arithmetic/bench.rs"
Expand Down Expand Up @@ -241,5 +254,10 @@ path = "examples/json/bench.rs"
harness = false
required-features = ["std"]

[[bench]]
name = "pratt"
path = "benches/pratt.rs"
harness = false

[lints]
workspace = true
1 change: 1 addition & 0 deletions benches/ariphmetic.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1+1*1*2*2*1+1*(2*1+1)*3*1+1*2*1*1+1+1*3*1*1+1+1+1*4*1*1+1+1+1+1*5*1*1+1-2-3*1+1-2-3*(1+1)-2-3*1+(1-2-3)*1*1^2*1*1^2+2*1^3*1*1^2+2*1^3+3*1^4*1*1^2+2*1^3+3*1^4+3*1^5*1*1*1^2*1*1^2+2*1^3*1*1^2+2*1^3+3*1^4*1*1^2+2*1^3+3*1^4+4*1^5*1+1+1*1*1*1*1-1-1*1-1-1*1-1+1-1+1-1*1+1-1+1-1+1+1*1-1*1-1*1+1-1*1-1*1-1-1-1-1+1+1+1*1+1+1-1-1-1*1*1*1*1-1-1-1-1-1*1*1*1+1+-1*1^2+1*1+1^2*1^2*2*2*1^2*1^2^3*1^2^3-1^8*+1^+2^+3-1^+8*1^-2^1-1-1^8*1^1-2^1-3-1-1^1-8*1-1^2^3-1^8*(-1+1^2^3)-(1^2^3-1)*(-1*1^2^3)-(1^2^3*-1)*(1^2^3*4)-(1^2^3-4)*1^2^3^1*(1^2)^3*1^(2^3)*1^2^3*1^(2*3)*1^(2^3)*1^2^3-1^13*(1*(2*(3*(4*(5*(6*(7*(1+1))))))))*(1+(2+(3+(4+(5+(6+(7+(1*1))))))))*((((((((1-1)*1)*2)*3)*4)*5)*6)*7)*((((((((1-1)+1)+2)+3)+4)+5)+6)+7)*(2+(3+(4+(5+(((((1*1)*9)*8)*7)*6)))))*(2*(3*(4*(5*(((((1-1)+9)+8)+7)+6)))))*((((((((1-1)*(1-2))*(2-3))*(3-4))*(4-5))*(5-6))*(6-7))*(7-8))*((((((((1-1)+(1-2))+(2-3))+(3-4))+(4-5))+(5-6))+(6-7))+(7-8))*((2-3)+((3-4)+((4-5)+((5-6)+(((((1*1)*(9-10))*(8-9))*(7-8))*(6-8))))))*((2-3)*((3-4)*((4-5)*((5-6)*(((((1*1)+(9-10))+(8-9))+(7-8))+(6-8))))))*(1*(2*(3*(4+(5+(6+(7*(8*(9*(10*(1+1)))))))))))*(((((((((((1-1)*1)*2)*3)+4)+5)+6)*7)*8)+9)+10)*(1-1)+(2-1)+(3-1)+(4-1)+(5-1)+(6-1)+(7-1)+(8-1)+(9-1)+(1-1)*(1-(1+1))+(2-(1-2))+(3-(1-3))+(4-(1-4))+(5-(1-5))+(6-(1-6))+(7-(1-7))+(8-(1-8))+(9-(1-9))*(1-1)+(1-2)+(1-3)+(1-4)+(1-5)+(1-6)+(1-7)+(1-8)+(1-9)+(1-1)*((1-1)-1)+((1-2)-2)+((1-3)-3)+((1-4)-4)+((1-5)-5)+((1-6)-6)+((1-7)-7)+((1-8)-8)+((1-9)-9)*(1-1)+(2-1)+(3-1)+(4-1)+(5-1)+(6-1)+(7-1)+(8-1)+(9-1)+(1-1)*(1-(1-1))+(2-(1-2))+(3-(1-3))+(4-(1-4))+(5-(1-5))+(6-(1-6))+(7-(1-7))+(8-(1-8))+(9-(1-9))*(1-1)+(1-2)+(1-3)+(1-4)+(1-5)+(1-6)+(1-7)+(1-8)+(1-9)+(1-1)*((1+1)-1)+((1-2)-2)+((1-3)-3)+((1-4)-4)+((1-5)-5)+((1-6)-6)+((1-7)-7)+((1-8)-8)+((1-9)-9)*(1+1-1)*(1-1-1)*1-((1+1)*(1-1))-1*1-((1*1)+(1-1))-3*1+1*(1+1)*3*(2*1+2*1)*2*(2*1)*(2*1)*2*-(1^1)*1+1*1*1*1+1*1+1*(1+1)*(1+1)*(-3)*1+1-1*1-5^6*1^1-1*1-5+6*2*(1+1)*((1+1)-2)+(1-(3*1))*((1+1+2+1+3)+1+4+1+5+1+6+1+7)-(1+1+2+1+3+1+4+1+5+(1+6+1+7))*(1+1+2+1+3+1+4+(1+5+1+6)+1+7)*(1+(1+2+1+3)+1+4+1+5+1+6+1+7)*(1-((((1+(((1*(((((1*((((3*((1+1)+1))+1)+1)*1))+0)+1)+1)-1))+1)+1))+1)*1)-1))*(1+1)*(1+2)*(1+3)*(1+4)*(1+5)*(1+6)*(1+7)*(1+8)*(1+9)*(1+10)*(1+11)*(1+12)*(1+(1+1)*(1+2))*(2+(1+3)*(1+4))*(3+(1+5)*(1+6))*(4+(1+7)*(1+8))*(5+(1+9)*(1+10))*(6+(1+11)*(1+12))*(5+1)+(2*1-2-3*1)*(1-3+1-4)+(1+7)*2*1^1+1*3*1^2+2*1^1+1*4*1^3+3*1^2+2*1^1+1*5*1^4+4*1^3+3*1^2+2*1^1+1*6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*9*1^8+8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*1*1^9+9*1^8+8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*2*1^10+1*1^9+9*1^8+8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*3*1^11+2*1^10+1*1^9+9*1^8+8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*4*1^12+3*1^11+2*1^10+1*1^9+9*1^8+8*1^7+7*1^6+6*1^5+5*1^4+4*1^3+3*1^2+2*1^1+1*(2*1+1)*((3*1+2)*1+1)*(((4*1+3)*1+2)*1+1)*((((5*1+4)*1+3)*1+2)*1+1)*(((((6*1+5)*1+4)*1+3)*1+2)*1+1)*((((((7*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*(((((((8*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*((((((((9*1+8)*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*(((((((((1*1+9)*1+8)*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*((((((((((2*1+1)*1+9)*1+8)*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*(((((((((((3*1+2)*1+1)*1+9)*1+8)*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*((((((((((((4*1+3)*1+2)*1+1)*1+9)*1+8)*1+7)*1+6)*1+5)*1+4)*1+3)*1+2)*1+1)*+1++2+1++2-1+-2-1+2-+1+2+-1+2--1++2-+1++2+-1++2--1+-1-1++1+1++1-+1++1+-1++1--1+-1*-1++1*+1+1*+1++1*1+-1--1++1-+1+(1)+(1)+(1)-(1)+-(1)+(1)+-(1)-(1)++(1)-(1)+(+1)+(-1)+(-1)+(+1)+(-1)+(-1)+(-1)-(-1)+(+1)+(+1)+(+(1))-(1)+(-(1))+(-(1))+(+(1))+(+(1))+(-(1))-(-(1))+3-3-((+1)+(-1))+3-3-((-1)+(+1))+3-3-((-1)+(-1))+3-3-((-1)-(-1))+3-3-((+1)-(-1))+3-3-((-1)-(+1))+3-3-((+1)*(-1))+3-3-((-1)*(+1))+3-3+((-1+1)+(-1+1))+3-3+((+1-1)+(+1-1))+3-3+((+1-1)+(+1-1))+3-3+((-1+1)+(-1+1))+3-3+((-1+1)-(-1+1))+3-3+((+1-1)-(+1-1))+3-3+((+1-1)-(+1-1))+3-3+((-1+1)-(-1+1))+3-3+((+1)+(+1))+3+((+1)-(+1))+3-3+((+1)*(+1))+3-3+((((+1))+((+1))))+3-3+((+1-1)+((+1-1)))+3-3+((((+1-2))+((+1-2))))+3-3+((((+1-(+2)))+((+1-(+2)))))+1^1+1^-1++1^+1++1^-1+-1^+1+-1^-1+-1^(-1)++1^+1+111++1^-1+111+-1^+1+111+-1^-1+111++1+2^3++1+2^+3++111+1^1++111+1^+1++111+1^(0+1)++111+1^(+1-0)++111+1^-1++111-1^1++111-1^+1++111-1^(0+1)++111-1^(+1-0)++111-1^-1++1^+1-111++1^-1-111+-1^+1-111+-1^-1-111++111-1^(010+1)++111-1^(+1-010)++111*1^1++111*1^+1++111*1^(0+1)++111*1^(+1-0)++111*1^-1+-111*1^1+-111*1^+1+-111*1^(0+1)+-111*1^(+1-0)+-111*1^-1+-1^2^3-1^6+-(-1^2^3)+1^6+-1^0+-1^1-1+-1^1-1+-1^(1-1)+-1^0+-1^1-1+-1^1-1+-1^(1-1)++++1
102 changes: 102 additions & 0 deletions benches/pratt.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
#[macro_use]
extern crate criterion;

use criterion::black_box;
use criterion::Criterion;

use winnow::ascii::digit1;
use winnow::combinator::delimited;
use winnow::combinator::empty;
use winnow::combinator::fail;
use winnow::combinator::peek;
use winnow::dispatch;
use winnow::token::any;
use winnow::PResult;
use winnow::Parser;

type Stream<'i> = &'i [u8];

static CORPUS: &str = include_str!("ariphmetic.txt");

fn pratt_parser(i: &mut Stream<'_>) -> PResult<i64> {
use winnow::combinator::precedence;
// precedence::precedence(
// dispatch! {peek(any);
// b'(' => delimited(b'(', pratt_parser, b')'),
// _ => digit1.parse_to::<i64>()
// },
// dispatch! {any;
// b'+' => empty.value((9, (&|a| a) as _)),
// b'-' => empty.value((9, (&|a: i64| -a) as _)),
// _ => fail
// },
// fail,
// dispatch! {any;
// b'+' => empty.value((5, 6, (&|a, b| a + b) as _)),
// b'-' => empty.value((5, 6, (&|a, b| a - b) as _)),
// b'*' => empty.value((7, 8, (&|a, b| a * b) as _)),
// b'/' => empty.value((7, 8, (&|a, b| a / b) as _)),
// b'%' => empty.value((7, 8, (&|a, b| a % b) as _)),
// b'^' => empty.value((9, 10, (&|a, b| a ^ b) as _)),
// _ => fail
// },
// )
// .parse_next(i)
Ok(0)
}

fn shunting_yard_parser(i: &mut Stream<'_>) -> PResult<i64> {
use winnow::combinator::shunting_yard;
// shunting_yard::precedence(
// dispatch! {peek(any);
// b'(' => delimited(b'(', shunting_yard_parser, b')'),
// _ => digit1.parse_to::<i64>()
// },
// dispatch! {any;
// b'+' => empty.value((9, (&|a| a) as _)),
// b'-' => empty.value((9, (&|a: i64| -a) as _)),
// _ => fail
// },
// fail,
// dispatch! {any;
// b'+' => empty.value((5, 6, (&|a, b| a + b) as _)),
// b'-' => empty.value((5, 6, (&|a, b| a - b) as _)),
// b'*' => empty.value((7, 8, (&|a, b| a * b) as _)),
// b'/' => empty.value((7, 8, (&|a, b| a / b) as _)),
// b'%' => empty.value((7, 8, (&|a, b| a % b) as _)),
// b'^' => empty.value((9, 10, (&|a, b| a ^ b) as _)),
// _ => fail
// },
// )
// .parse_next(i)
Ok(0)
}

fn parse_expression(c: &mut Criterion) {
// remove the last `\n`
let input = &CORPUS.as_bytes()[0..CORPUS.as_bytes().len() - 1];
let mut group = c.benchmark_group("pratt");

pratt_parser.parse(input).expect("pratt should parse");
shunting_yard_parser
.parse(input)
.expect("shunting yard should parse");

group.bench_function("pratt", |b| {
b.iter(|| black_box(pratt_parser.parse(input).unwrap()));
});

group.bench_function("shunting yard", |b| {
b.iter(|| black_box(shunting_yard_parser.parse(input).unwrap()));
});
}

// https://www.jibbow.com/posts/criterion-flamegraphs/
use pprof::criterion::{Output, PProfProfiler};
criterion_group! {
name = benches;
config = Criterion::default().with_profiler(PProfProfiler::new(100, Output::Flamegraph(None)));
targets =parse_expression
}

criterion_main!(benches);
97 changes: 97 additions & 0 deletions examples/pratt/bench.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
mod parser;

use std::cell::RefCell;

use criterion::{black_box, BatchSize};
use winnow::{prelude::*, Stateful};

fn pratt(c: &mut criterion::Criterion) {
let input =
"a = 2*-2 * (a ? 1 + 2 * 4 - --a.bar + 2 : 2) / ( &**foo.a->p! -+1) + 3^1 / 4 == 1 * (2 - 7 + 567 *12 /2) + 3*(1+2*( 45 /2))";
let mut group = c.benchmark_group("pratt");

{
let bump = RefCell::new(bumpalo::Bump::new());

{
let i = Stateful {
input,
state: &*bump.borrow(),
};
parser::pratt_parser.parse(i).expect("pratt should parse");
}
bump.borrow_mut().reset();
{
let i = Stateful {
input,
state: &*bump.borrow(),
};
parser::shunting_yard_parser
.parse(i)
.expect("shunting yard should parse");
}
bump.borrow_mut().reset();

{
group.bench_function("pratt", |b| {
b.iter_batched(
|| {
bump.borrow_mut().reset();
&bump
},
|b| {
let i = Stateful {
input,
state: &*b.borrow(),
};
black_box(parser::pratt_parser.parse(i).unwrap());
},
BatchSize::SmallInput,
);
});
}
{
group.bench_function("shunting_yard", |b| {
b.iter_batched(
|| {
bump.borrow_mut().reset();
&bump
},
|b| {
let i = Stateful {
input,
state: &*b.borrow(),
};
black_box(parser::shunting_yard_parser.parse(i).unwrap());
},
BatchSize::SmallInput,
);
});
}
}

// group.bench_function("pratt_with_new_bump_each_time", |b| {
// b.iter_batched(
// || bumpalo::Bump::new(),
// |b| {
// let i = Stateful { input, state: &b };
// black_box(parser::pratt_parser.parse(i).unwrap());
// },
// BatchSize::SmallInput,
// );
// });
//
// group.bench_function("shunting_yard_with_new_bump_each_time", |b| {
// b.iter_batched(
// || bumpalo::Bump::new(),
// |b| {
// let i = Stateful { input, state: &b };
// black_box(parser::shunting_yard_parser.parse(i).unwrap());
// },
// BatchSize::SmallInput,
// );
// });
}

criterion::criterion_group!(benches, pratt);
criterion::criterion_main!(benches);
49 changes: 49 additions & 0 deletions examples/pratt/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
use winnow::{prelude::*, Stateful};

mod parser;

fn main() -> Result<(), lexopt::Error> {
let args = Args::parse()?;

let input = args.input.as_deref().unwrap_or("1 + 1");
let b = bumpalo::Bump::new();
let input = Stateful {
input,
state: &b,
};
match parser::pratt_parser.parse(input) {
Ok(result) => {
println!("{result}");
}
Err(err) => {
println!("FAILED");
println!("{err}");
}
}

Ok(())
}

#[derive(Default)]
struct Args {
input: Option<String>,
}

impl Args {
fn parse() -> Result<Self, lexopt::Error> {
use lexopt::prelude::*;

let mut res = Args::default();

let mut args = lexopt::Parser::from_env();
while let Some(arg) = args.next()? {
match arg {
Value(input) => {
res.input = Some(input.string()?);
}
_ => return Err(arg.unexpected()),
}
}
Ok(res)
}
}
Loading