Skip to content

Commit

Permalink
chore: add configurable indentation handling to parser context
Browse files Browse the repository at this point in the history
  • Loading branch information
gvozdvmozgu authored and benfdking committed Apr 10, 2024
1 parent 89a24b1 commit 229eaf3
Show file tree
Hide file tree
Showing 13 changed files with 275 additions and 83 deletions.
11 changes: 11 additions & 0 deletions crates/lib/src/core/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -457,6 +457,17 @@ impl Index<&str> for Value {
}

impl Value {
pub fn to_bool(&self) -> bool {
match *self {
Value::Int(v) => v != 0,
Value::Bool(v) => v,
Value::Float(v) => v != 0.0,
Value::String(ref v) => !v.is_empty(),
Value::Map(ref v) => !v.is_empty(),
Value::None => false,
}
}

pub fn as_map(&self) -> Option<&AHashMap<String, Value>> {
if let Self::Map(map) = self { Some(map) } else { None }
}
Expand Down
15 changes: 11 additions & 4 deletions crates/lib/src/core/parser/context.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use ahash::AHashMap;
use itertools::Itertools;

use super::match_result::MatchResult;
use super::matchable::Matchable;
Expand All @@ -14,12 +15,13 @@ pub struct ParseContext {
match_stack: Vec<String>,
match_depth: usize,
track_progress: bool,
pub terminators: Vec<Box<dyn Matchable>>,
pub(crate) terminators: Vec<Box<dyn Matchable>>,
parse_cache: AHashMap<((String, (usize, usize), &'static str, usize), String), MatchResult>,
pub(crate) indentation_config: AHashMap<String, bool>,
}

impl ParseContext {
pub fn new(dialect: Dialect) -> Self {
pub fn new(dialect: Dialect, indentation_config: AHashMap<String, bool>) -> Self {
Self {
dialect,
tqdm: None,
Expand All @@ -29,16 +31,21 @@ impl ParseContext {
track_progress: true,
terminators: Vec::new(),
parse_cache: AHashMap::new(),
indentation_config,
}
}

pub fn dialect(&self) -> &Dialect {
&self.dialect
}

pub fn from_config(_config: FluffConfig) -> Self {
pub fn from_config(config: FluffConfig) -> Self {
let dialect = dialect_selector("ansi").unwrap();
Self::new(dialect)
let indentation_config = config.raw["indentation"].as_map().unwrap().clone();
let indentation_config: AHashMap<_, _> =
indentation_config.into_iter().map(|(key, value)| (key, value.to_bool())).collect();

Self::new(dialect, indentation_config)
}

pub fn progress_bar<T>(&mut self, mut f: impl FnMut(&mut Self) -> T) -> T {
Expand Down
10 changes: 5 additions & 5 deletions crates/lib/src/core/parser/grammar/anyof.rs
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,7 @@ mod tests {
g.disallow_gaps();
}

let mut ctx = ParseContext::new(fresh_ansi_dialect());
let mut ctx = ParseContext::new(fresh_ansi_dialect(), <_>::default());

// Check directly
let mut segments = g.match_segments(&test_segments(), &mut ctx).unwrap();
Expand All @@ -347,7 +347,7 @@ mod tests {

#[test]
fn test__parser__grammar_oneof_templated() {
let mut ctx = ParseContext::new(fresh_ansi_dialect());
let mut ctx = ParseContext::new(fresh_ansi_dialect(), <_>::default());

let bs = StringParser::new(
"bar",
Expand Down Expand Up @@ -420,7 +420,7 @@ mod tests {
];

let segments = generate_test_segments_func(vec!["a", " ", "b", " ", "c", "d", " ", "d"]);
let mut parse_cx = ParseContext::new(fresh_ansi_dialect());
let mut parse_cx = ParseContext::new(fresh_ansi_dialect(), <_>::default());

for (mode, sequence, terminators, output, max_times) in cases {
let elements = sequence
Expand Down Expand Up @@ -514,7 +514,7 @@ mod tests {
None,
);

let mut ctx = ParseContext::new(fresh_ansi_dialect());
let mut ctx = ParseContext::new(fresh_ansi_dialect(), <_>::default());
let g = AnyNumberOf::new(vec![Box::new(bar), Box::new(foo)]);
let result = g.match_segments(&segments, &mut ctx).unwrap().matched_segments;

Expand Down Expand Up @@ -558,7 +558,7 @@ mod tests {
let g1 = one_of(vec![Box::new(foo_regex.clone()), Box::new(foo.clone())]);
let g2 = one_of(vec![Box::new(foo), Box::new(foo_regex)]);

let mut ctx = ParseContext::new(fresh_ansi_dialect());
let mut ctx = ParseContext::new(fresh_ansi_dialect(), <_>::default());

for segment in g1.match_segments(&segments, &mut ctx).unwrap().matched_segments.iter() {
assert_eq!(segment.get_raw().unwrap(), "foo");
Expand Down
21 changes: 12 additions & 9 deletions crates/lib/src/core/parser/grammar/base.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
use std::borrow::Cow;
use std::ops::Deref;

use ahash::AHashSet;
use itertools::enumerate;
use uuid::Uuid;
Expand Down Expand Up @@ -99,7 +102,7 @@ impl Matchable for BaseGrammar {

#[derive(Clone, Hash)]
pub struct Ref {
reference: String,
reference: Cow<'static, str>,
exclude: Option<Box<dyn Matchable>>,
terminators: Vec<Box<dyn Matchable>>,
reset_terminators: bool,
Expand All @@ -116,9 +119,9 @@ impl std::fmt::Debug for Ref {

impl Ref {
// Constructor function
pub fn new(reference: impl ToString) -> Self {
pub fn new(reference: impl Into<Cow<'static, str>>) -> Self {
Ref {
reference: reference.to_string(),
reference: reference.into(),
exclude: None,
terminators: Vec::new(),
reset_terminators: false,
Expand All @@ -145,7 +148,7 @@ impl Ref {

// Static method to create a Ref instance for a keyword
pub fn keyword(keyword: &str) -> Self {
let name = format!("{}KeywordSegment", capitalize(keyword));
let name = capitalize(keyword) + "KeywordSegment";
Ref::new(name)
}
}
Expand Down Expand Up @@ -178,7 +181,7 @@ impl Matchable for Ref {
crumbs: Option<Vec<&str>>,
) -> Option<(AHashSet<String>, AHashSet<String>)> {
if let Some(ref c) = crumbs {
if c.contains(&self.reference.as_str()) {
if c.contains(&self.reference.deref()) {
let loop_string = c.join(" -> ");
panic!("Self referential grammar detected: {}", loop_string);
}
Expand Down Expand Up @@ -450,7 +453,7 @@ mod tests {
// Assuming 'generate_test_segments' and 'fresh_ansi_dialect' are implemented
// elsewhere
let ts = generate_test_segments_func(vec!["ABS", "ABSOLUTE"]);
let mut ctx = ParseContext::new(fresh_ansi_dialect());
let mut ctx = ParseContext::new(fresh_ansi_dialect(), <_>::default());

// Assert ABS does not match, due to the exclude
assert!(ni.match_segments(&[ts[0].clone()], &mut ctx).unwrap().matched_segments.is_empty());
Expand All @@ -463,7 +466,7 @@ mod tests {

#[test]
fn test_parser_grammar_nothing() {
let mut ctx = ParseContext::new(fresh_ansi_dialect());
let mut ctx = ParseContext::new(fresh_ansi_dialect(), <_>::default());

assert!(
Nothing::new()
Expand All @@ -488,7 +491,7 @@ mod tests {
(0..2, "bar", true, (0..2).into()),
];

let mut ctx = ParseContext::new(fresh_ansi_dialect());
let mut ctx = ParseContext::new(fresh_ansi_dialect(), <_>::default());
for (segments_slice, matcher_keyword, trim_noncode, result_slice) in cases {
let matchers = vec![
StringParser::new(
Expand Down Expand Up @@ -560,7 +563,7 @@ mod tests {
Box::new(Sequence::new(vec![bs, fs])),
];

let mut ctx = ParseContext::new(fresh_ansi_dialect());
let mut ctx = ParseContext::new(fresh_ansi_dialect(), <_>::default());
// Matching the first element of the list
let (match_result, matcher) =
longest_trimmed_match(&test_segments(), matchers.clone(), &mut ctx, true).unwrap();
Expand Down
95 changes: 95 additions & 0 deletions crates/lib/src/core/parser/grammar/conditional.rs
Original file line number Diff line number Diff line change
@@ -1 +1,96 @@
use crate::core::errors::SQLParseError;
use crate::core::parser::context::ParseContext;
use crate::core::parser::match_result::MatchResult;
use crate::core::parser::matchable::Matchable;
use crate::core::parser::segments::base::{ErasedSegment, Segment};
use crate::core::parser::segments::meta::Indent;
use crate::helpers::ToErasedSegment;

#[derive(Clone, Debug, Hash, PartialEq)]
pub struct Conditional {
meta: Indent,
indented_joins: bool,
indented_using_on: bool,
indented_on_contents: bool,
indented_then: bool,
}

impl Conditional {
pub fn new(meta: Indent) -> Self {
Self {
meta,
indented_joins: false,
indented_using_on: false,
indented_on_contents: false,
indented_then: false,
}
}

pub fn indented_joins(mut self) -> Self {
self.indented_joins = true;
self
}

pub fn indented_using_on(mut self) -> Self {
self.indented_using_on = true;
self
}

pub fn indented_on_contents(mut self) -> Self {
self.indented_on_contents = true;
self
}

pub fn indented_then(mut self) -> Self {
self.indented_then = true;
self
}

pub fn indented_then_contents(mut self) -> Self {
self
}

fn is_enabled(&self, parse_context: &mut ParseContext) -> bool {
macro_rules! check_config_match {
($self:expr, $parse_context:expr, $field:ident) => {{
let config_value = $parse_context
.indentation_config
.get(stringify!($field))
.copied()
.unwrap_or_default();

if $self.$field && $self.$field != config_value {
return false;
}
}};
}

check_config_match!(self, parse_context, indented_joins);
check_config_match!(self, parse_context, indented_using_on);
check_config_match!(self, parse_context, indented_on_contents);
check_config_match!(self, parse_context, indented_then);

true
}
}

impl Segment for Conditional {}

impl Matchable for Conditional {
fn match_segments(
&self,
segments: &[ErasedSegment],
parse_context: &mut ParseContext,
) -> Result<MatchResult, SQLParseError> {
if !self.is_enabled(parse_context) {
return Ok(MatchResult::from_unmatched(segments.to_vec()));
}

dbg!(self);

Ok(MatchResult {
matched_segments: vec![self.meta.clone().to_erased_segment()],
unmatched_segments: segments.to_vec(),
})
}
}
6 changes: 3 additions & 3 deletions crates/lib/src/core/parser/grammar/delimited.rs
Original file line number Diff line number Diff line change
Expand Up @@ -306,7 +306,7 @@ mod tests {
(2.into(), true, false, vec!["bar", ".", "bar", "foo"], 0),
];

let mut ctx = ParseContext::new(fresh_ansi_dialect());
let mut ctx = ParseContext::new(fresh_ansi_dialect(), <_>::default());

for (min_delimiters, allow_gaps, allow_trailing, token_list, match_len) in cases {
let test_segments = generate_test_segments_func(token_list);
Expand Down Expand Up @@ -349,7 +349,7 @@ mod tests {

#[test]
fn test__parser__grammar_anything_bracketed() {
let mut ctx = ParseContext::new(fresh_ansi_dialect());
let mut ctx = ParseContext::new(fresh_ansi_dialect(), <_>::default());
let foo = StringParser::new(
"foo",
|segment| {
Expand Down Expand Up @@ -393,7 +393,7 @@ mod tests {
for (terminators, match_length) in cases {
let _panic = enter_panic(terminators.join(" "));

let mut cx = ParseContext::new(fresh_ansi_dialect());
let mut cx = ParseContext::new(fresh_ansi_dialect(), <_>::default());
let terms = terminators
.iter()
.map(|it| {
Expand Down
2 changes: 1 addition & 1 deletion crates/lib/src/core/parser/grammar/noncode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ mod tests {
#[test]
fn test__parser__grammar_noncode() {
let dialect = fresh_ansi_dialect(); // Assuming this function exists and returns a Dialect
let mut ctx = ParseContext::new(dialect);
let mut ctx = ParseContext::new(dialect, <_>::default());

let matcher = NonCodeMatcher;
let test_segments = test_segments(); // Assuming this function exists and generates test segments
Expand Down
Loading

0 comments on commit 229eaf3

Please sign in to comment.