diff --git a/Cargo.toml b/Cargo.toml index 9bb587e..2739839 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,7 @@ members = ["derive"] [package] name = "from-pest" -version = "0.3.3" +version = "0.3.4" edition = "2021" authors = ["cad97 "] readme = "./README.md" diff --git a/derive/Cargo.toml b/derive/Cargo.toml index a5df884..377ff10 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "pest-ast" -version = "0.3.5" +version = "0.3.6" edition = "2021" authors = ["cad97 "] description = "Derive to convert from pest parse tree to typed syntax tree" diff --git a/derive/README.md b/derive/README.md index f6bf012..a7b7e27 100644 --- a/derive/README.md +++ b/derive/README.md @@ -306,3 +306,66 @@ And doing the actual parse is as simple as let mut parse_tree = csv::Parser::parse(csv::Rule::file, &source)?; let syntax_tree = File::from_pest(&mut parse_tree).expect("infallible"); ``` + +## Default Values for Optional Rules + +A powerful feature for handling optional grammar rules without requiring `Option` in your AST is the `#[pest_ast(default(...))]` attribute. This allows you to specify default values that will be used when optional rules are not present in the input. + +### The Problem + +When using optional rules in Pest grammar, you typically need `Option` in your AST: + +```rust +// Grammar: function = { "fn" ~ id ~ ("->" ~ type)? ~ "{" ~ "}" } + +#[derive(FromPest, Debug)] +#[pest_ast(rule(Rule::function))] +pub struct Function { + pub name: String, + pub return_type: Option, // Optional field +} +``` + +### The Solution + +With the `default` attribute, you can eliminate `Option` and specify a default value: + +```rust +#[derive(FromPest, Debug)] +#[pest_ast(rule(Rule::function))] +pub struct Function { + pub name: String, + + #[pest_ast(default(Type::Void))] // Specify default value + pub return_type: Type, // No Option needed! +} +``` + +### Usage Examples + +```rust +// Simple defaults +#[pest_ast(default(Type::Void))] +pub return_type: Type, + +// Complex defaults with expressions +#[pest_ast(default(Vec::new()))] +pub parameters: Vec, + +#[pest_ast(default({ + Config { + debug: false, + optimization_level: 2, + } +}))] +pub config: Config, +``` + +### How It Works + +The `default` attribute generates code that: +1. First tries to parse the field normally using `FromPest` +2. If conversion fails with `NoMatch` (optional rule not present), uses the default value +3. If parsing fails with other errors, propagates the error + +This provides a clean, type-safe way to handle optional grammar elements while keeping your AST representation simple and avoiding the complexity of `Option` handling. diff --git a/derive/examples/defaults_showcase.pest b/derive/examples/defaults_showcase.pest new file mode 100644 index 0000000..4eff18b --- /dev/null +++ b/derive/examples/defaults_showcase.pest @@ -0,0 +1,21 @@ +// Simple grammar showcasing default values + +WHITESPACE = _{ " " | "\t" | "\n" | "\r" } + +// Variable declarations with optional types and initialization +var_decl = { var_kind ~ id ~ type_annotation? ~ initializer? ~ ";" } +var_kind = { "let" | "const" } +type_annotation = { ":" ~ type_name } +initializer = { "=" ~ expr } + +// Expressions +expr = { number | string | id } +number = { ASCII_DIGIT+ } +string = { "\"" ~ (!("\"") ~ ANY)* ~ "\"" } + +// Basic constructs +id = { ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* } +type_name = { "int" | "string" | "bool" | "void" } + +// Program +program = { var_decl* } diff --git a/derive/examples/defaults_showcase.rs b/derive/examples/defaults_showcase.rs new file mode 100644 index 0000000..179b6c9 --- /dev/null +++ b/derive/examples/defaults_showcase.rs @@ -0,0 +1,239 @@ +#![allow( + bad_style, + dead_code, + clippy::clone_on_copy, + clippy::upper_case_acronyms +)] + +#[macro_use] +extern crate pest_derive; +extern crate from_pest; +#[macro_use] +extern crate pest_ast; +extern crate pest; + +use from_pest::FromPest; +use pest::Parser; + +#[derive(Parser)] +#[grammar = "../examples/defaults_showcase.pest"] +pub struct ShowcaseParser; + +// Define enum types that can have defaults +#[derive(Debug, Clone, PartialEq)] +pub enum Type { + Int, + String, + Bool, + Void, +} + +impl<'pest> FromPest<'pest> for Type { + type Rule = Rule; + type FatalError = from_pest::Void; + + fn from_pest( + pest: &mut pest::iterators::Pairs<'pest, Rule>, + ) -> Result> { + let pair = pest.next().ok_or(from_pest::ConversionError::NoMatch)?; + if pair.as_rule() == Rule::type_name { + match pair.as_str() { + "int" => Ok(Type::Int), + "string" => Ok(Type::String), + "bool" => Ok(Type::Bool), + "void" => Ok(Type::Void), + _ => Err(from_pest::ConversionError::NoMatch), + } + } else { + Err(from_pest::ConversionError::NoMatch) + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Expr { + Number(i32), + String(String), + Id(String), +} + +impl<'pest> FromPest<'pest> for Expr { + type Rule = Rule; + type FatalError = from_pest::Void; + + fn from_pest( + pest: &mut pest::iterators::Pairs<'pest, Rule>, + ) -> Result> { + let pair = pest.next().ok_or(from_pest::ConversionError::NoMatch)?; + match pair.as_rule() { + Rule::expr => { + // The expr rule contains nested rules, so we need to look at its inner content + let mut inner = pair.into_inner(); + let inner_pair = inner.next().ok_or(from_pest::ConversionError::NoMatch)?; + match inner_pair.as_rule() { + Rule::number => Ok(Expr::Number(inner_pair.as_str().parse().unwrap())), + Rule::string => { + let s = inner_pair.as_str(); + Ok(Expr::String(s[1..s.len() - 1].to_string())) // Remove quotes + } + Rule::id => Ok(Expr::Id(inner_pair.as_str().to_string())), + _ => Err(from_pest::ConversionError::NoMatch), + } + } + Rule::number => Ok(Expr::Number(pair.as_str().parse().unwrap())), + Rule::string => { + let s = pair.as_str(); + Ok(Expr::String(s[1..s.len() - 1].to_string())) // Remove quotes + } + Rule::id => Ok(Expr::Id(pair.as_str().to_string())), + _ => Err(from_pest::ConversionError::NoMatch), + } + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum VarKind { + Let, + Const, +} + +impl<'pest> FromPest<'pest> for VarKind { + type Rule = Rule; + type FatalError = from_pest::Void; + + fn from_pest( + pest: &mut pest::iterators::Pairs<'pest, Rule>, + ) -> Result> { + let pair = pest.next().ok_or(from_pest::ConversionError::NoMatch)?; + if pair.as_rule() == Rule::var_kind { + match pair.as_str() { + "let" => Ok(VarKind::Let), + "const" => Ok(VarKind::Const), + _ => Err(from_pest::ConversionError::NoMatch), + } + } else { + Err(from_pest::ConversionError::NoMatch) + } + } +} + +#[derive(FromPest, Debug, Clone, PartialEq)] +#[pest_ast(rule(Rule::id))] +pub struct Id<'pest> { + #[pest_ast(outer())] + pub span: pest::Span<'pest>, +} + +impl<'pest> Id<'pest> { + pub fn name(&self) -> &str { + self.span.as_str() + } +} + +#[derive(FromPest, Debug, Clone, PartialEq)] +#[pest_ast(rule(Rule::type_annotation))] +pub struct TypeAnnotation { + pub type_name: Type, +} + +#[derive(FromPest, Debug, Clone, PartialEq)] +#[pest_ast(rule(Rule::initializer))] +pub struct Initializer { + pub expr: Expr, +} + +// Variable declaration showcasing multiple default values +#[derive(FromPest, Debug, Clone, PartialEq)] +#[pest_ast(rule(Rule::var_decl))] +pub struct VarDecl<'pest> { + // Now this should parse correctly from the var_kind rule + #[pest_ast(default(VarKind::Let))] + pub kind: VarKind, + + pub id: Id<'pest>, + + // Type annotation defaults to 'void' if not specified + #[pest_ast(default(TypeAnnotation { type_name: Type::Void }))] + pub type_annotation: TypeAnnotation, + + // Initialization defaults to a placeholder value + #[pest_ast(default(Initializer { expr: Expr::Number(0) }))] + pub initializer: Initializer, +} + +#[derive(FromPest, Debug, Clone, PartialEq)] +#[pest_ast(rule(Rule::program))] +pub struct Program<'pest> { + pub declarations: Vec>, +} + +fn main() -> Result<(), Box> { + println!("=== Default Values Showcase ===\n"); + + // Test 1: Minimal declaration (all defaults) + let input1 = "let x;"; + println!("Input 1: {input1}"); + let pairs1 = ShowcaseParser::parse(Rule::program, input1)?; + let program1: Program = Program::from_pest(&mut pairs1.clone())?; + println!("Parsed: {program1:#?}\n"); + + // Test 2: With type annotation + let input2 = "let y: int;"; + println!("Input 2: {input2}"); + let pairs2 = ShowcaseParser::parse(Rule::program, input2)?; + let program2: Program = Program::from_pest(&mut pairs2.clone())?; + println!("Parsed: {program2:#?}\n"); + + // Test 3: With initialization + let input3 = "let z = 42;"; + println!("Input 3: {input3}"); + let pairs3 = ShowcaseParser::parse(Rule::program, input3)?; + let program3: Program = Program::from_pest(&mut pairs3.clone())?; + println!("Parsed: {program3:#?}\n"); + + // Test 4: Fully specified + let input4 = "const w: string = \"hello\";"; + println!("Input 4: {input4}"); + let pairs4 = ShowcaseParser::parse(Rule::program, input4)?; + let program4: Program = Program::from_pest(&mut pairs4.clone())?; + println!("Parsed: {program4:#?}\n"); + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_defaults_applied() { + let input = "let x;"; + let pairs = ShowcaseParser::parse(Rule::program, input).unwrap(); + let program: Program = Program::from_pest(&mut pairs.clone()).unwrap(); + + assert_eq!(program.declarations.len(), 1); + let decl = &program.declarations[0]; + + // All defaults should be applied + assert_eq!(decl.kind, VarKind::Let); + assert_eq!(decl.type_annotation.type_name, Type::Void); + assert_eq!(decl.initializer.expr, Expr::Number(0)); + assert_eq!(decl.id.name(), "x"); + } + + #[test] + fn test_explicit_values_override_defaults() { + let input = "const y: int = 42;"; + let pairs = ShowcaseParser::parse(Rule::program, input).unwrap(); + let program: Program = Program::from_pest(&mut pairs.clone()).unwrap(); + + assert_eq!(program.declarations.len(), 1); + let decl = &program.declarations[0]; + + // Explicit values should override defaults + assert_eq!(decl.kind, VarKind::Const); // Now should be correctly parsed + assert_eq!(decl.type_annotation.type_name, Type::Int); // Explicit + assert_eq!(decl.initializer.expr, Expr::Number(42)); // Explicit + assert_eq!(decl.id.name(), "y"); + } +} diff --git a/derive/examples/function_defaults.pest b/derive/examples/function_defaults.pest new file mode 100644 index 0000000..c21fd39 --- /dev/null +++ b/derive/examples/function_defaults.pest @@ -0,0 +1,18 @@ +// Grammar for a simple language with function declarations +// Functions can have optional return types that default to "void" + +WHITESPACE = _{ " " | "\t" | "\n" | "\r" } + +// Basic types +id = { ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* } +type_name = { "void" | "int" | "string" | id } + +// Function parameter +param = { id ~ ":" ~ type_name } +params = { param ~ ("," ~ param)* } + +// Function declaration with optional return type +function = { "fn" ~ id ~ "(" ~ params? ~ ")" ~ ("->" ~ type_name)? ~ "{" ~ "}" } + +// Program is a sequence of functions +program = { function* } diff --git a/derive/examples/function_defaults.rs b/derive/examples/function_defaults.rs new file mode 100644 index 0000000..a3f193c --- /dev/null +++ b/derive/examples/function_defaults.rs @@ -0,0 +1,136 @@ +#![allow( + bad_style, + dead_code, + clippy::clone_on_copy, + clippy::upper_case_acronyms +)] + +#[macro_use] +extern crate pest_derive; +extern crate from_pest; +#[macro_use] +extern crate pest_ast; +extern crate pest; + +use from_pest::FromPest; +use pest::Parser; + +#[derive(Parser)] +#[grammar = "../examples/function_defaults.pest"] +pub struct FunctionParser; + +// Define a simple enum for types that can have a default +#[derive(Debug, Clone, PartialEq, Default)] +pub enum Type { + #[default] + Void, + Int, + String, +} + +// Implement FromPest for Type +impl<'pest> FromPest<'pest> for Type { + type Rule = Rule; + type FatalError = from_pest::Void; + + fn from_pest( + pest: &mut pest::iterators::Pairs<'pest, Rule>, + ) -> Result> { + let pair = pest.next().ok_or(from_pest::ConversionError::NoMatch)?; + if pair.as_rule() == Rule::type_name { + let span = pair.as_span(); + match span.as_str() { + "void" => Ok(Type::Void), + "int" => Ok(Type::Int), + "string" => Ok(Type::String), + _ => Err(from_pest::ConversionError::NoMatch), + } + } else { + Err(from_pest::ConversionError::NoMatch) + } + } +} + +// Define the AST types + +#[derive(FromPest, Debug, Clone, PartialEq)] +#[pest_ast(rule(Rule::id))] +pub struct Id<'pest> { + #[pest_ast(outer())] + pub span: pest::Span<'pest>, +} + +#[derive(FromPest, Debug, Clone, PartialEq)] +#[pest_ast(rule(Rule::param))] +pub struct Param<'pest> { + pub id: Id<'pest>, + pub type_name: Type, +} + +// This is the key example: function return type with default +#[derive(FromPest, Debug, Clone, PartialEq)] +#[pest_ast(rule(Rule::function))] +pub struct Function<'pest> { + pub id: Id<'pest>, + pub params: Vec>, + + // This demonstrates the new default feature! + // Instead of Option, we use Type with a default + #[pest_ast(default(Type::Void))] + pub return_type: Type, +} + +#[derive(FromPest, Debug, Clone, PartialEq)] +#[pest_ast(rule(Rule::program))] +pub struct Program<'pest> { + pub functions: Vec>, +} + +fn main() -> Result<(), Box> { + // Test with a function that has no return type (should default) + let input1 = "fn main() {}"; + let pairs1 = FunctionParser::parse(Rule::program, input1)?; + let program1: Program = Program::from_pest(&mut pairs1.clone())?; + println!("Program 1 (no return type): {program1:#?}"); + + // Test with a function that has an explicit return type + let input2 = "fn add() -> int {}"; + let pairs2 = FunctionParser::parse(Rule::program, input2)?; + let program2: Program = Program::from_pest(&mut pairs2.clone())?; + println!("Program 2 (explicit return type): {program2:#?}"); + + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_function_with_default_return_type() { + let input = "fn main() {}"; + let pairs = FunctionParser::parse(Rule::program, input).unwrap(); + let program: Program = Program::from_pest(&mut pairs.clone()).unwrap(); + + assert_eq!(program.functions.len(), 1); + let function = &program.functions[0]; + + // The return type should be Void (the default) even though it wasn't specified + assert_eq!(function.return_type, Type::Void); + println!("Function return type: {:?}", function.return_type); + } + + #[test] + fn test_function_with_explicit_return_type() { + let input = "fn add() -> int {}"; + let pairs = FunctionParser::parse(Rule::program, input).unwrap(); + let program: Program = Program::from_pest(&mut pairs.clone()).unwrap(); + + assert_eq!(program.functions.len(), 1); + let function = &program.functions[0]; + + // The return type should be Int + assert_eq!(function.return_type, Type::Int); + println!("Function return type: {:?}", function.return_type); + } +} diff --git a/derive/examples/simple_enum_derives.rs b/derive/examples/simple_enum_derives.rs index 84aebc9..eef61b6 100644 --- a/derive/examples/simple_enum_derives.rs +++ b/derive/examples/simple_enum_derives.rs @@ -58,8 +58,8 @@ fn main() { let source = "aaabbbccc"; let mut parse_tree = SimpleParser::parse(Rule::ABC, source).expect("parse success"); - println!("parse tree = {:#?}", parse_tree); + println!("parse tree = {parse_tree:#?}"); let syntax_tree = ABC::from_pest(&mut parse_tree).expect("infallible"); - println!("syntax tree = {:#?}", syntax_tree); + println!("syntax tree = {syntax_tree:#?}"); } diff --git a/derive/examples/simple_struct_derives.rs b/derive/examples/simple_struct_derives.rs index 63b4186..8beb591 100644 --- a/derive/examples/simple_struct_derives.rs +++ b/derive/examples/simple_struct_derives.rs @@ -54,8 +54,8 @@ fn main() { let source = "aaabbbccc"; let mut parse_tree = SimpleParser::parse(Rule::S, source).expect("parse success"); - println!("parse tree = {:#?}", parse_tree); + println!("parse tree = {parse_tree:#?}"); let syntax_tree = S::from_pest(&mut parse_tree).expect("infallible"); - println!("syntax tree = {:#?}", syntax_tree); + println!("syntax tree = {syntax_tree:#?}"); } diff --git a/derive/src/attributes.rs b/derive/src/attributes.rs index 1b81253..1e6d38c 100644 --- a/derive/src/attributes.rs +++ b/derive/src/attributes.rs @@ -21,6 +21,7 @@ mod kw { custom_keyword!(inner); custom_keyword!(with); custom_keyword!(rule); + custom_keyword!(default); } /// `#[pest_ast(..)]` for the outer `#[derive(FromPest)]` @@ -39,6 +40,8 @@ pub(crate) enum FieldAttribute { Outer(OuterAttribute), /// `inner(rule(path::to), with(path::to),*)` Inner(InnerAttribute), + /// `default(expr)` + Default(DefaultAttribute), } #[derive(Debug)] @@ -80,6 +83,13 @@ pub(crate) struct RuleAttribute { pub(crate) variant: Ident, } +#[derive(Debug)] +pub(crate) struct DefaultAttribute { + pub(crate) default: kw::default, + pub(crate) paren: Paren, + pub(crate) expr: syn::Expr, +} + impl DeriveAttribute { pub(crate) fn from_attributes(attrs: impl IntoIterator) -> Result> { attrs @@ -184,6 +194,8 @@ impl Parse for FieldAttribute { OuterAttribute::parse(input).map(FieldAttribute::Outer) } else if lookahead.peek(kw::inner) { InnerAttribute::parse(input).map(FieldAttribute::Inner) + } else if lookahead.peek(kw::default) { + DefaultAttribute::parse(input).map(FieldAttribute::Default) } else { Err(lookahead.error()) } @@ -204,6 +216,7 @@ impl ToTokens for FieldAttribute { match self { FieldAttribute::Outer(attr) => attr.to_tokens(tokens), FieldAttribute::Inner(attr) => attr.to_tokens(tokens), + FieldAttribute::Default(attr) => attr.to_tokens(tokens), } } } @@ -343,3 +356,23 @@ impl ToTokens for RuleAttribute { }); } } + +impl Parse for DefaultAttribute { + fn parse(input: ParseStream) -> Result { + let content; + Ok(DefaultAttribute { + default: input.parse()?, + paren: parenthesized!(content in input), + expr: content.parse()?, + }) + } +} + +impl ToTokens for DefaultAttribute { + fn to_tokens(&self, tokens: &mut TokenStream) { + self.default.to_tokens(tokens); + self.paren.surround(tokens, |tokens| { + self.expr.to_tokens(tokens); + }); + } +} diff --git a/derive/src/from_pest/field.rs b/derive/src/from_pest/field.rs index 951f548..f8d9a20 100644 --- a/derive/src/from_pest/field.rs +++ b/derive/src/from_pest/field.rs @@ -15,6 +15,7 @@ enum ConversionStrategy { FromPest, Outer(Span, Vec), Inner(Span, Vec, Option), + Default(Span, syn::Expr), } impl ConversionStrategy { @@ -39,6 +40,9 @@ impl ConversionStrategy { parse_quote!(#path::#variant) }), ), + (Some(FieldAttribute::Default(attr)), None) => { + ConversionStrategy::Default(attr.span(), attr.expr) + } _ => unreachable!(), }) } @@ -87,6 +91,18 @@ impl ConversionStrategy { }; with_mods(get_span, mods) } + ConversionStrategy::Default(span, default_expr) => { + // For default strategy, we try to parse as normal FromPest, + // but if it fails (NoMatch), we use the default value + quote_spanned! {span=> { + // Try to parse using FromPest first + match ::from_pest::FromPest::from_pest(inner) { + Ok(value) => value, + Err(::from_pest::ConversionError::NoMatch) => #default_expr, + Err(e) => return Err(e), + } + }} + } }; if let Member::Named(name) = member { quote!(#name : #conversion) diff --git a/examples/csv.rs b/examples/csv.rs index 91e5ecc..135c6ef 100644 --- a/examples/csv.rs +++ b/examples/csv.rs @@ -56,9 +56,9 @@ fn main() -> Result<(), Box> { let source = String::from_utf8(fs::read("./examples/csv.csv")?)?; let mut parse_tree = csv::Parser::parse(csv::Rule::file, &source)?; - println!("parse tree = {:#?}", parse_tree); + println!("parse tree = {parse_tree:#?}"); let syntax_tree: File = File::from_pest(&mut parse_tree).expect("infallible"); - println!("syntax tree = {:#?}", syntax_tree); + println!("syntax tree = {syntax_tree:#?}"); println!(); let mut field_sum = 0.0; @@ -71,8 +71,8 @@ fn main() -> Result<(), Box> { } } - println!("Sum of fields: {}", field_sum); - println!("Number of records: {}", record_count); + println!("Sum of fields: {field_sum}"); + println!("Number of records: {record_count}"); Ok(()) } diff --git a/src/lib.rs b/src/lib.rs index 5dec632..65d6eba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -37,12 +37,10 @@ where fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { ConversionError::NoMatch => write!(f, "Rule did not match, failed to convert node"), - ConversionError::Malformed(fatalerror) => write!(f, "Malformed node: {}", fatalerror), - ConversionError::Extraneous { current_node, .. } => write!( - f, - "when converting {}, found extraneous tokens", - current_node - ), + ConversionError::Malformed(fatalerror) => write!(f, "Malformed node: {fatalerror}"), + ConversionError::Extraneous { current_node, .. } => { + write!(f, "when converting {current_node}, found extraneous tokens") + } } } }