Skip to content

Commit

Permalink
Refactored core, parser
Browse files Browse the repository at this point in the history
  - fix(core) State::expected() returns Iterator
  - add(core) lr::Context::expected(), glr::Context::expected()
  - remove(core) expected from InvalidTerminal

  - use drain instead of into_iter for memory reuse
  - fix(core) use drain() instead of into_iter() for memory reuse

  - remove BTree sorting since core::builder::State now using BTree
  - remove(core,emit) builder::State::to_export()
  • Loading branch information
ehwan committed Sep 2, 2024
1 parent 99efec8 commit baeb831
Show file tree
Hide file tree
Showing 19 changed files with 194 additions and 175 deletions.
8 changes: 4 additions & 4 deletions rusty_lr/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty_lr"
version = "2.7.1"
version = "2.7.2"
edition = "2021"
license = "MIT"
description = "GLR, LR(1) and LALR(1) parser generator with custom reduce action"
Expand All @@ -10,9 +10,9 @@ keywords = ["parser", "bison", "lr", "glr", "compiler"]
categories = ["parsing", "compilers", "parser-implementations"]

[dependencies]
rusty_lr_core = { version = "2.11.1", path = "../rusty_lr_core" }
rusty_lr_derive = { version = "1.21.0", path = "../rusty_lr_derive", optional = true }
rusty_lr_buildscript = { version = "0.16.0", path = "../rusty_lr_buildscript", optional = true }
rusty_lr_core = { version = "2.12.0", path = "../rusty_lr_core" }
rusty_lr_derive = { version = "1.22.0", path = "../rusty_lr_derive", optional = true }
rusty_lr_buildscript = { version = "0.17.0", path = "../rusty_lr_buildscript", optional = true }

[features]
default = ["derive"]
Expand Down
6 changes: 3 additions & 3 deletions rusty_lr_buildscript/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty_lr_buildscript"
version = "0.16.0"
version = "0.17.0"
edition = "2021"
license = "MIT"
description = "buildscipt tools for rusty_lr"
Expand All @@ -11,8 +11,8 @@ categories = ["parsing"]


[dependencies]
rusty_lr_parser = { version = "3.19.0", path = "../rusty_lr_parser" }
rusty_lr_core = { version = "2.11.0", path = "../rusty_lr_core", features = [
rusty_lr_parser = { version = "3.20.0", path = "../rusty_lr_parser" }
rusty_lr_core = { version = "2.12.0", path = "../rusty_lr_core", features = [
"builder",
] }
codespan-reporting = "0.11"
Expand Down
2 changes: 1 addition & 1 deletion rusty_lr_core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "rusty_lr_core"
version = "2.11.1"
version = "2.12.0"
edition = "2021"
license = "MIT"
description = "core library for rusty_lr"
Expand Down
56 changes: 23 additions & 33 deletions rusty_lr_core/src/builder/state.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
use crate::hashmap::HashMap;
use crate::rule::LookaheadRuleRefSet;

use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::hash::Hash;

/// state in DFA building
#[derive(Debug, Clone)]
pub struct State<Term, NonTerm> {
pub shift_goto_map_term: HashMap<Term, usize>,
pub shift_goto_map_nonterm: HashMap<NonTerm, usize>,
pub reduce_map: HashMap<Term, BTreeSet<usize>>,
pub shift_goto_map_term: BTreeMap<Term, usize>,
pub shift_goto_map_nonterm: BTreeMap<NonTerm, usize>,
pub reduce_map: BTreeMap<Term, BTreeSet<usize>>,
pub ruleset: LookaheadRuleRefSet<Term>,
}
impl<Term, NonTerm> State<Term, NonTerm> {
Expand All @@ -21,40 +20,31 @@ impl<Term, NonTerm> State<Term, NonTerm> {
ruleset: LookaheadRuleRefSet::new(),
}
}
/// We have two different `State` types. One in the `crate::builder` module and one in the `crate`.
/// This state in `crate::builder` is used to build the DFA, which contains the lookaheads of the rules.
/// This method converts the `State` in `crate::builder` to the `State` in `crate`.
pub fn to_export(self) -> crate::lr::State<Term, NonTerm>
where
Term: Hash + Eq,
{
crate::lr::State {
shift_goto_map_term: self.shift_goto_map_term,
shift_goto_map_nonterm: self.shift_goto_map_nonterm,
reduce_map: self
.reduce_map

/// Map terminal and non-terminal symbols to another type.
/// This is useful when exporting & importing rules.
pub fn map<NewTerm: Ord, NewNonTerm: Ord>(
self,
term_map: impl Fn(Term) -> NewTerm,
nonterm_map: impl Fn(NonTerm) -> NewNonTerm,
) -> State<NewTerm, NewNonTerm> {
State {
shift_goto_map_term: self
.shift_goto_map_term
.into_iter()
.map(|(k, v)| (k, v.into_iter().next().unwrap()))
.map(|(term, state)| (term_map(term), state))
.collect(),
shift_goto_map_nonterm: self
.shift_goto_map_nonterm
.into_iter()
.map(|(nonterm, state)| (nonterm_map(nonterm), state))
.collect(),
ruleset: self.ruleset.rules.into_keys().collect(),
}
}
/// We have two different `State` types. One in the `crate::builder` module and one in the `crate`.
/// This state in `crate::builder` is used to build the DFA, which contains the lookaheads of the rules.
/// This method converts the `State` in `crate::builder` to the `State` in `crate`.
pub fn to_export_glr(self) -> crate::glr::State<Term, NonTerm>
where
Term: Hash + Eq,
{
crate::glr::State {
shift_goto_map_term: self.shift_goto_map_term,
shift_goto_map_nonterm: self.shift_goto_map_nonterm,
reduce_map: self
.reduce_map
.into_iter()
.map(|(k, v)| -> (Term, Vec<usize>) { (k, v.into_iter().collect()) })
.map(|(term, rule)| (term_map(term), rule))
.collect(),
ruleset: self.ruleset.rules.into_keys().collect(),
ruleset: self.ruleset.map(term_map),
}
}
}
Expand Down
57 changes: 46 additions & 11 deletions rusty_lr_core/src/glr/context.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use std::rc::Rc;

use super::Node;
use super::Parser;
use super::{MultiplePathError, NodeData};

use crate::HashMap;
use crate::HashSet;

#[cfg(feature = "tree")]
use crate::TreeList;
Expand All @@ -24,19 +26,17 @@ pub struct Context<Data: NodeData> {

/// For temporary use. store arguments for calling `reduce_action`.
/// But we don't want to reallocate every `feed` call
pub reduce_args: Vec<Data>,
pub(crate) reduce_args: Vec<Data>,

/// For temporary use. store nodes for next reduce.
pub(crate) nodes_pong: HashMap<usize, Vec<Rc<Node<Data>>>>,
}

impl<Data: NodeData> Context<Data> {
/// Create a new context.
/// `current_nodes` is initialized with a root node.
pub fn new() -> Self {
Context {
current_nodes: HashMap::from_iter([(0, vec![Rc::new(Node::new_root())])]),
state_list: Vec::new(),
reduce_errors: Vec::new(),
reduce_args: Vec::new(),
}
Default::default()
}

/// Get number of diverged paths
Expand Down Expand Up @@ -122,9 +122,7 @@ impl<Data: NodeData> Context<Data> {
/// For debugging.
/// Get all sequence of token trees (from root to current node) for every diverged path.
#[cfg(feature = "tree")]
pub fn to_tree_lists<'a>(
&'a self,
) -> impl Iterator<Item = TreeList<Data::Term, Data::NonTerm>> + 'a
pub fn to_tree_lists(&self) -> impl Iterator<Item = TreeList<Data::Term, Data::NonTerm>> + '_
where
Data::Term: Clone,
Data::NonTerm: Clone,
Expand Down Expand Up @@ -164,11 +162,48 @@ impl<Data: NodeData> Context<Data> {
.collect()
})
}

/// This function should be called after `feed()` returns `Error`.
/// Get expected tokens for last `feed()` call.
/// The iterator can contain duplicate tokens.
pub fn expected<'a, P: Parser<Term = Data::Term, NonTerm = Data::NonTerm>>(
&'a self,
p: &'a P,
) -> impl Iterator<Item = &'a Data::Term>
where
Data::Term: 'a,
Data::NonTerm: 'a,
{
self.state_list
.iter()
.flat_map(|state| p.get_states()[*state].expected())
}

/// This function should be called after `feed()` returns `Error`.
/// Get expected tokens for last `feed()` call.
/// The iterator does not contain duplicate tokens.
pub fn expected_dedup<'a, P: Parser<Term = Data::Term, NonTerm = Data::NonTerm>>(
&'a self,
p: &'a P,
) -> impl Iterator<Item = &'a Data::Term>
where
Data::Term: 'a + std::hash::Hash + Eq,
Data::NonTerm: 'a,
{
let dedupped: HashSet<&'a Data::Term> = self.expected(p).collect();
dedupped.into_iter()
}
}

impl<Data: NodeData> Default for Context<Data> {
fn default() -> Self {
Self::new()
Context {
current_nodes: HashMap::from_iter([(0, vec![Rc::new(Node::new_root())])]),
state_list: Default::default(),
reduce_errors: Default::default(),
reduce_args: Default::default(),
nodes_pong: Default::default(),
}
}
}

Expand Down
17 changes: 0 additions & 17 deletions rusty_lr_core/src/glr/error.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use std::collections::BTreeSet;
use std::fmt::Debug;
use std::fmt::Display;

Expand All @@ -7,8 +6,6 @@ use std::fmt::Display;
pub struct InvalidTerminalError<Term, ReduceActionError> {
/// The terminal that feeded to the parser.
pub term: Term,
/// The expected terminals, Along all the paths.
pub expected: Vec<Term>,
/// The reduce action errors.
pub reduce_errors: Vec<ReduceActionError>,
}
Expand All @@ -19,20 +16,6 @@ impl<Term: Display, ReduceActionError: Display> Display
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "Invalid Terminal: {}. ", self.term)?;

if self.expected.is_empty() {
write!(f, "No expected token")?;
} else {
let expected: BTreeSet<String> =
self.expected.iter().map(|t| format!("{}", t)).collect();
write!(f, "Expected one of: ")?;
let len = expected.len();
for (id, term) in expected.into_iter().enumerate() {
write!(f, "{}", term)?;
if id < len - 1 {
write!(f, ", ")?;
}
}
}
for error in &self.reduce_errors {
write!(f, "\nReduce action error: {}", error)?;
}
Expand Down
40 changes: 14 additions & 26 deletions rusty_lr_core/src/glr/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ pub(crate) mod state;

pub mod node;

use crate::HashMap;
pub use context::Context;
pub use error::InvalidTerminalError;
pub use error::MultiplePathError;
Expand Down Expand Up @@ -34,14 +33,14 @@ where
P::NonTerm: Hash + Eq + Clone,
{
let mut reduce_nodes = std::mem::take(&mut context.current_nodes);
context.nodes_pong.clear();

context.state_list.clear();
context.reduce_errors.clear();

// BFS reduce
while !reduce_nodes.is_empty() {
let mut reduce_nodes_pong: HashMap<usize, Vec<Rc<Node<Data>>>> = HashMap::default();
for (state, nodes) in reduce_nodes.into_iter() {
for (state, nodes) in reduce_nodes.drain() {
let next_term_shift_state = parser.get_states()[state].shift_goto_term(&term);
context.state_list.push(state);
if let Some(reduce_rules) = parser.get_states()[state].reduce(&term) {
Expand All @@ -56,7 +55,6 @@ where
reduce_rule,
Rc::clone(&node),
context,
&mut reduce_nodes_pong,
&term,
userdata,
);
Expand All @@ -67,7 +65,6 @@ where
reduce_rules[0],
Rc::clone(&node),
context,
&mut reduce_nodes_pong,
&term,
userdata,
);
Expand All @@ -87,15 +84,7 @@ where
.push(Rc::new(next_node));
}
} else {
reduce(
parser,
reduce_rules[0],
node,
context,
&mut reduce_nodes_pong,
&term,
userdata,
);
reduce(parser, reduce_rules[0], node, context, &term, userdata);
}
}
} else if let Some(next_term_shift_state) = next_term_shift_state {
Expand All @@ -116,20 +105,12 @@ where
}
}
}
reduce_nodes = reduce_nodes_pong;
std::mem::swap(&mut reduce_nodes, &mut context.nodes_pong);
}

if context.current_nodes.is_empty() {
let mut expected = parser.get_states()[context.state_list[0]].expected();
for state in context.state_list.iter().skip(1) {
expected = expected
.union(&parser.get_states()[*state].expected())
.cloned()
.collect();
}
Err(InvalidTerminalError {
term,
expected: expected.into_iter().cloned().collect(),
reduce_errors: std::mem::take(&mut context.reduce_errors),
})
} else {
Expand Down Expand Up @@ -214,7 +195,6 @@ fn reduce<P: Parser, Data: NodeData<Term = P::Term, NonTerm = P::NonTerm> + Clon
reduce_rule: usize,
node: Rc<Node<Data>>,
context: &mut Context<Data>,
out: &mut HashMap<usize, Vec<Rc<Node<Data>>>>,
term: &P::Term,
userdata: &mut Data::UserData,
) -> bool
Expand All @@ -231,7 +211,13 @@ where
let parent = data_extracted;

let mut do_shift = true;
match Data::new_nonterm(reduce_rule, context, &mut do_shift, term, userdata) {
match Data::new_nonterm(
reduce_rule,
&mut context.reduce_args,
&mut do_shift,
term,
userdata,
) {
Ok(new_data) => {
if let Some(nonterm_shift_state) = parser.get_states()[parent.state]
.shift_goto_nonterm(&parser.get_rules()[reduce_rule].name)
Expand All @@ -244,7 +230,9 @@ where
tree: Some(tree),
};

out.entry(nonterm_shift_state)
context
.nodes_pong
.entry(nonterm_shift_state)
.or_default()
.push(Rc::new(new_node));
}
Expand Down
Loading

0 comments on commit baeb831

Please sign in to comment.