Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
5432613
ClickHouse dialect: fix keyword-as-identifier, cast args, CREATE TABL…
alexey-milovidov Feb 17, 2026
bf15a4f
Add missing keyword token types to is_keyword() for better identifier…
alexey-milovidov Feb 17, 2026
9e18e9c
ClickHouse: support {name:Type} query parameters in all identifier co…
alexey-milovidov Feb 17, 2026
0e3954f
ClickHouse: support NOT IN without parentheses (e.g., x NOT IN table_…
alexey-milovidov Feb 17, 2026
4a7df92
ClickHouse: handle all ALTER TABLE actions as raw SQL
alexey-milovidov Feb 17, 2026
4b37b2e
ClickHouse: fix GLOBAL JOIN detection (GLOBAL is Var, not a keyword t…
alexey-milovidov Feb 17, 2026
1d59709
ClickHouse: fix dictionary SOURCE parsing, CHECK TABLE, and ALTER TAB…
alexey-milovidov Feb 18, 2026
8ac17a9
ClickHouse: fix EXPLAIN in subqueries and dictionary kind parsing
alexey-milovidov Feb 18, 2026
e9a3a02
ClickHouse: add RENAME/OPTIMIZE/EXISTS/SETTINGS statements, hex liter…
alexey-milovidov Feb 18, 2026
83ae57c
ClickHouse: fix ternary in parens, tuple element access, dictionary H…
alexey-milovidov Feb 18, 2026
074cd17
ClickHouse: fix Array(Type) cast syntax and JSON(subcolumns) type par…
alexey-milovidov Feb 18, 2026
c70c13c
ClickHouse: fix WITH TOTALS without GROUP BY, trailing comma tuples
alexey-milovidov Feb 18, 2026
65a9663
ClickHouse: skip INSERT FORMAT raw data, support zero-param lambdas a…
alexey-milovidov Feb 18, 2026
1485a53
ClickHouse: unary plus, view() table func, JSON ^path, INSERT (*), Or…
alexey-milovidov Feb 18, 2026
1e112a5
ClickHouse: add STATISTICS column modifier, revert Order as keyword i…
alexey-milovidov Feb 18, 2026
f77d769
ClickHouse: USE keyword names, WITH tuple/lambda, GRANT ON db.*, view…
alexey-milovidov Feb 18, 2026
3926cac
ClickHouse: nested columns, NANOSECOND interval, ORDER BY DESC, AS al…
alexey-milovidov Feb 18, 2026
fceff4f
ClickHouse: RENAME TABLE, KILL MUTATION, DETACH, nested tuples, dateD…
alexey-milovidov Feb 18, 2026
ccb8f99
ClickHouse: Order as identifier, braced param in FROM, dotted column …
alexey-milovidov Feb 18, 2026
1f3a791
ClickHouse: add EXISTS/LIMIT BY second LIMIT, CAST tuple type, func(*…
alexey-milovidov Feb 18, 2026
3fdb690
ClickHouse: CHECK without parens, EXTRACT func, column defaults, enum…
alexey-milovidov Feb 18, 2026
c112893
ClickHouse: union as table name, EXPRESSION in dictionaries, REFRESH …
alexey-milovidov Feb 18, 2026
fa4dd79
ClickHouse: minus() as function (Except token), EXPRESSION in column …
alexey-milovidov Feb 18, 2026
c296391
ClickHouse: fix ORDER BY as implicit alias in SELECT, BINARY LARGE OB…
alexey-milovidov Feb 18, 2026
cc9af05
ClickHouse: trailing commas in tuples, FIRST/LAST table aliases, AFTE…
alexey-milovidov Feb 18, 2026
3d8e3dc
ClickHouse: UUID clause in CREATE TABLE/VIEW, skip UUID string value
alexey-milovidov Feb 18, 2026
18777a7
ClickHouse: EXPLAIN QUERY TREE settings, RENAME COLUMN dots, NATIONAL…
alexey-milovidov Feb 18, 2026
71fce84
ClickHouse: SHOW CREATE qualified names, EXISTS double parens, DROP O…
alexey-milovidov Feb 18, 2026
d73f2e3
ClickHouse: APPLY lambdas, trailing commas, EXCEPT strings, TTL WHERE…
alexey-milovidov Feb 18, 2026
d87f430
ClickHouse: zero-arg functions, SHOW SETTINGS, empty USING/PRIMARY KE…
alexey-milovidov Feb 18, 2026
2c572ec
ClickHouse: zero-arg SUM/AVG, EXPLAIN nested parens, INSERT qualified…
alexey-milovidov Feb 18, 2026
d823c48
ClickHouse: DIV keyword, INSERT VALUES without commas, EXCEPT STRICT,…
alexey-milovidov Feb 18, 2026
a09f6fe
ClickHouse: SETTINGS in function calls, IGNORE NULLS postfix, tuple i…
alexey-milovidov Feb 18, 2026
c974c74
ClickHouse: AS alias in function args, SETTINGS in function args, ON …
alexey-milovidov Feb 18, 2026
ee2ab26
ClickHouse: window frame arithmetic, enum NULL, REPLACE/APPLY star mo…
alexey-milovidov Feb 18, 2026
d1823a7
ClickHouse: FORMAT Null, COLLATE in window ORDER BY, IGNORE NULLS, CO…
alexey-milovidov Feb 18, 2026
76ace04
ClickHouse: GROUP BY ALL WITH, EMPTY/CLONE AS, LIFETIME neg, nested c…
alexey-milovidov Feb 18, 2026
0d48c99
ClickHouse: EXPLAIN subquery, window base ref, UNSIGNED/SIGNED types,…
alexey-milovidov Feb 18, 2026
8a4bb3b
ClickHouse: PASTE JOIN, WITH in view body, SHOW CREATE access control…
alexey-milovidov Feb 18, 2026
4687077
ClickHouse: fix keyword-as-identifier in UPDATE/INSERT, RLike identif…
alexey-milovidov Feb 18, 2026
0d32326
ClickHouse: fix SETTINGS clauses, DISTINCT-as-identifier, TIME.N tupl…
alexey-milovidov Feb 18, 2026
c9807d9
ClickHouse: fix empty IN/VALUES, values-as-identifier, floor() args, …
alexey-milovidov Feb 18, 2026
67ce32f
ClickHouse: Unicode whitespace/quotes/minus tokenizer fixes, trailing…
alexey-milovidov Feb 18, 2026
7f97c62
ClickHouse: fix FROM as column, INT() empty parens, Time('UTC'), back…
alexey-milovidov Feb 18, 2026
bc1e5bc
ClickHouse: except/from as identifiers, ALIAS column modifier, traili…
alexey-milovidov Feb 18, 2026
fd6d78d
ClickHouse: // comments, REGEXP as function, EPHEMERAL type after exp…
alexey-milovidov Feb 18, 2026
33ed1d0
ClickHouse: OVERLAY as regular function, CAST with expression type arg
alexey-milovidov Feb 18, 2026
e59c203
ClickHouse: fix GROUPING SETS lookahead, allow ntile() with multiple …
alexey-milovidov Feb 18, 2026
b483e31
ClickHouse: GROUPING as column, double semicolons, INDEX/PROJECTION i…
alexey-milovidov Feb 18, 2026
61e99ca
ClickHouse: fix TIMESTAMP WITH FILL, keyword after dot in identifier …
alexey-milovidov Feb 18, 2026
ca036d1
ClickHouse: PRIMARY KEY expressions, PROJECTION INDEX, LIMIT modulo, …
alexey-milovidov Feb 18, 2026
d5f268e
ClickHouse: countIf inline alias, DROP IF EMPTY, PARTITION expression…
alexey-milovidov Feb 19, 2026
0a41302
test: filter clientError annotations from ClickHouse corpus test
alexey-milovidov Feb 19, 2026
f36380f
ClickHouse: add COLUMNS/star APPLY/EXCEPT/REPLACE column transformer …
alexey-milovidov Feb 19, 2026
96f6eb7
ClickHouse: hex float literals, WITH tuple CTE, nested paren tuple al…
alexey-milovidov Feb 19, 2026
c6c1f7b
ClickHouse: fix pseudocolumn as lambda param, bare INSERT VALUES
alexey-milovidov Feb 19, 2026
51acf74
ClickHouse: comprehensive lambda/keyword-as-identifier support
alexey-milovidov Feb 19, 2026
7e246c1
ClickHouse: USING *, structural keywords as identifiers in expression…
alexey-milovidov Feb 19, 2026
9593eb0
ClickHouse: empty tuple subscript, FORMAT inline data consumption
alexey-milovidov Feb 19, 2026
44ebcd4
ClickHouse: keyword table aliases, EXTRACT as regular function
alexey-milovidov Feb 19, 2026
289f681
ClickHouse: subquery column aliases, from-as-column, GRANT multi-tabl…
alexey-milovidov Feb 19, 2026
5274c85
ClickHouse: REVOKE wildcard/multi-privilege, EXPLAIN nesting depth, M…
alexey-milovidov Feb 19, 2026
850f36a
ClickHouse: lambda EXCEPT comma, alias-in-expr-list operators, REFRES…
alexey-milovidov Feb 19, 2026
12d5850
ClickHouse: WITH FILL parse_or for complex expressions, keyword alias…
alexey-milovidov Feb 19, 2026
6db063c
ClickHouse: dictionary SOURCE STRUCTURE block with space-separated co…
alexey-milovidov Feb 19, 2026
e6e7166
ClickHouse: star expressions with operators (* IS NOT NULL, * AND expr)
alexey-milovidov Feb 19, 2026
16e8f71
ClickHouse: MethodCall COLUMNS EXCEPT, star-in-CASE fuzz test tolerance
alexey-milovidov Feb 19, 2026
f0afeeb
ClickHouse: implicit and explicit aliases in function arguments (CAST…
alexey-milovidov Feb 19, 2026
9eee364
Fix test expectations and compiler warnings
alexey-milovidov Feb 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
196 changes: 196 additions & 0 deletions crates/polyglot-sql/examples/test_clickhouse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
use std::fs;
use std::path::Path;

use polyglot_sql::{parse, DialectType};

fn main() {
let dir = Path::new("../ClickHouse/tests/queries/0_stateless");

let mut sql_files: Vec<_> = fs::read_dir(dir)
.expect("Cannot read directory")
.filter_map(|e| e.ok())
.filter(|e| e.path().extension().map_or(false, |ext| ext == "sql"))
.map(|e| e.path())
.collect();

sql_files.sort();

let mut total_files = 0;
let mut successful_files = 0;
let mut failed_files = 0;
let mut total_statements = 0;
let mut successful_statements = 0;
let mut failed_statements = 0;
let mut errors: Vec<(String, String, String)> = Vec::new();

for path in &sql_files {
total_files += 1;
let content = match fs::read_to_string(path) {
Ok(c) => c,
Err(e) => {
eprintln!("Cannot read {}: {}", path.display(), e);
failed_files += 1;
continue;
}
};

let file_name = path.file_name().unwrap().to_string_lossy().to_string();
let mut file_ok = true;

// Pre-process: remove statements annotated with -- { clientError ... }
// These are intentional syntax error tests that ClickHouse's own parser also rejects.
// Strategy: split by semicolons, check if the text AFTER a semicolon starts with
// a clientError annotation, and if so skip the statement BEFORE that semicolon.
let filtered_content = {
let mut result = String::new();
let parts: Vec<&str> = content.split(';').collect();
for i in 0..parts.len() {
// Check if text after this semicolon starts with clientError annotation
let next_is_client_error = if i + 1 < parts.len() {
let next = parts[i + 1].trim_start();
// Check for -- { clientError ... } at start of next segment
next.starts_with("--") && next.contains("clientError")
} else {
false
};
// Check if THIS part contains clientError (e.g., inline on continuation)
let this_has_client_error = parts[i].contains("clientError");

if next_is_client_error {
// Skip this statement (the SQL before the clientError annotation)
// But keep a comment to maintain line structure
result.push_str("/* skipped */");
} else if this_has_client_error {
// This segment contains the clientError annotation itself
// Extract any valid SQL after the annotation line
let mut lines_after: Vec<&str> = Vec::new();
let mut found_annotation = false;
for line in parts[i].lines() {
if found_annotation {
lines_after.push(line);
}
if line.contains("clientError") {
found_annotation = true;
}
}
result.push_str(&lines_after.join("\n"));
} else {
result.push_str(parts[i]);
}
if i < parts.len() - 1 {
result.push(';');
}
}
result
};

// Check if filtered content has any actual SQL (not just comments/whitespace)
let has_sql = filtered_content
.lines()
.any(|l| {
let t = l.trim();
!t.is_empty() && !t.starts_with("--") && !t.starts_with("/*")
&& t != ";" && t.chars().any(|c| c.is_alphanumeric())
});

if !has_sql {
// File contained only clientError statements (or was empty) — count as success
successful_files += 1;
total_statements += 1;
successful_statements += 1;
continue;
}

// Parse the whole file at once (the parser handles multiple statements)
match parse(&filtered_content, DialectType::ClickHouse) {
Ok(exprs) => {
total_statements += exprs.len().max(1);
successful_statements += exprs.len().max(1);
}
Err(e) => {
// Count statements roughly by semicolons
let stmt_count = content
.split(';')
.filter(|s| {
s.trim()
.lines()
.any(|l| {
let t = l.trim();
!t.is_empty() && !t.starts_with("--")
})
})
.count()
.max(1);
total_statements += stmt_count;
failed_statements += stmt_count;
file_ok = false;
let error_msg = format!("{}", e);
let display_content: String = content.chars().take(300).collect();
errors.push((file_name.clone(), display_content, error_msg));
}
}

if file_ok {
successful_files += 1;
} else {
failed_files += 1;
}
}

println!("=== ClickHouse SQL Parsing Test Results ===");
println!();
println!(
"Files: {} total, {} OK, {} with errors",
total_files, successful_files, failed_files
);
println!(
"Statements: {} total, ~{} OK, ~{} errors",
total_statements, successful_statements, failed_statements
);
println!();
println!(
"Success rate (files): {:.1}%",
100.0 * successful_files as f64 / total_files as f64
);
println!(
"Success rate (statements): {:.1}%",
100.0 * successful_statements as f64 / total_statements as f64
);
println!();

if !errors.is_empty() {
// Count errors by category
let mut error_categories: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
for (_, _, err) in &errors {
// Normalize error message for grouping
let key = if let Some(pos) = err.find(" near [") {
err[..pos].to_string()
} else {
err.clone()
};
*error_categories.entry(key).or_insert(0) += 1;
}
let mut categories: Vec<_> = error_categories.into_iter().collect();
categories.sort_by(|a, b| b.1.cmp(&a.1));
println!("=== Error categories ===");
for (msg, count) in &categories {
println!(" {:4} {}", count, msg);
}

println!();
println!("=== All errors ===");
for (i, (file, stmt, err)) in errors.iter().enumerate() {
println!();
println!("--- Error #{} in {} ---", i + 1, file);
println!("SQL: {}", stmt);
println!("Error: {}", err);
}

// Print failing filenames list
println!();
println!("=== Failing files ===");
for (file, _, err) in &errors {
println!(" {} -> {}", file, err);
}
}
}
29 changes: 29 additions & 0 deletions crates/polyglot-sql/examples/test_ternary.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use polyglot_sql::{parse, DialectType};

fn test(label: &str, sql: &str) {
match parse(sql, DialectType::ClickHouse) {
Ok(exprs) => println!("OK: {} ({} stmts)", label, exprs.len()),
Err(e) => println!("ERR: {} -> {}", label, e),
}
}

fn main() {
// Normal EXTRACT
test("e1", "SELECT EXTRACT(DAY FROM toDate('2019-05-05'))");
test("e2", "SELECT EXTRACT(YEAR FROM now())");
// ClickHouse function-style extract
test("e3", "SELECT extract('1234', '123')");
test("e4", "SELECT extract('1234' arg_1, '123' arg_2), arg_1, arg_2");
// Normal CAST
test("c1", "SELECT cast('1234' AS UInt32)");
test("c2", "SELECT cast(x AS DateTime('UTC'))");
// Normal SUBSTRING
test("s1", "SELECT substring('hello' FROM 2 FOR 3)");
test("s2", "SELECT substring('hello', 2, 3)");
// Normal TRIM
test("t1", "SELECT trim(BOTH ' ' FROM ' hello ')");
test("t2", "SELECT trim(' hello ')");
// Normal DATEADD/DATEDIFF
test("d1", "SELECT dateAdd(DAY, 1, now())");
test("d2", "SELECT dateDiff(DAY, now(), now())");
}
1 change: 1 addition & 0 deletions crates/polyglot-sql/src/dialects/bigquery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ impl DialectImpl for BigQueryDialect {
Some(crate::expressions::IntervalUnit::Second) => "SECOND",
Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
None => "DAY",
};
let unit = Expression::Identifier(crate::expressions::Identifier {
Expand Down
9 changes: 7 additions & 2 deletions crates/polyglot-sql/src/dialects/clickhouse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,17 @@ impl DialectImpl for ClickHouseDialect {
// ClickHouse uses double quotes and backticks for identifiers
config.identifiers.insert('"', '"');
config.identifiers.insert('`', '`');
// ClickHouse does NOT support nested comments
config.nested_comments = false;
// ClickHouse supports nested comments
config.nested_comments = true;
// ClickHouse allows identifiers to start with digits
config.identifiers_can_start_with_digit = true;
// ClickHouse uses backslash escaping in strings
config.string_escapes.push('\\');
// ClickHouse supports # as single-line comment
config.hash_comments = true;
// ClickHouse supports 0xDEADBEEF hex integer literals
config.hex_number_strings = true;
config.hex_string_is_integer_type = true;
config
}

Expand Down
2 changes: 2 additions & 0 deletions crates/polyglot-sql/src/dialects/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12938,6 +12938,7 @@ impl Dialect {
crate::expressions::IntervalUnit::Second => "SECOND",
crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
}
}
_ => "",
Expand Down Expand Up @@ -18652,6 +18653,7 @@ impl Dialect {
crate::expressions::IntervalUnit::Second => "SECOND".to_string(),
crate::expressions::IntervalUnit::Millisecond => "MILLISECOND".to_string(),
crate::expressions::IntervalUnit::Microsecond => "MICROSECOND".to_string(),
crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND".to_string(),
}
}

Expand Down
1 change: 1 addition & 0 deletions crates/polyglot-sql/src/dialects/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ fn interval_unit_to_str(unit: &IntervalUnit) -> String {
IntervalUnit::Second => "SECOND".to_string(),
IntervalUnit::Millisecond => "MILLISECOND".to_string(),
IntervalUnit::Microsecond => "MICROSECOND".to_string(),
IntervalUnit::Nanosecond => "NANOSECOND".to_string(),
}
}

Expand Down
2 changes: 2 additions & 0 deletions crates/polyglot-sql/src/dialects/tsql.rs
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ impl DialectImpl for TSQLDialect {
Some(crate::expressions::IntervalUnit::Second) => "SECOND",
Some(crate::expressions::IntervalUnit::Millisecond) => "MILLISECOND",
Some(crate::expressions::IntervalUnit::Microsecond) => "MICROSECOND",
Some(crate::expressions::IntervalUnit::Nanosecond) => "NANOSECOND",
None => "DAY",
};
let unit = Expression::Identifier(crate::expressions::Identifier {
Expand All @@ -397,6 +398,7 @@ impl DialectImpl for TSQLDialect {
crate::expressions::IntervalUnit::Second => "SECOND",
crate::expressions::IntervalUnit::Millisecond => "MILLISECOND",
crate::expressions::IntervalUnit::Microsecond => "MICROSECOND",
crate::expressions::IntervalUnit::Nanosecond => "NANOSECOND",
};
let unit = Expression::Identifier(crate::expressions::Identifier {
name: unit_str.to_string(),
Expand Down
9 changes: 9 additions & 0 deletions crates/polyglot-sql/src/expressions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3002,6 +3002,8 @@ pub enum JoinKind {
// ClickHouse ARRAY JOIN
Array,
LeftArray,
// ClickHouse PASTE JOIN (positional join)
Paste,
}

impl Default for JoinKind {
Expand Down Expand Up @@ -3825,6 +3827,7 @@ pub enum IntervalUnit {
Second,
Millisecond,
Microsecond,
Nanosecond,
}

/// SQL Command (COMMIT, ROLLBACK, BEGIN, etc.)
Expand Down Expand Up @@ -5900,6 +5903,10 @@ pub enum AlterTableAction {
partition: Expression,
source: Option<Box<Expression>>,
},
/// Raw SQL for dialect-specific ALTER TABLE actions (e.g., ClickHouse UPDATE/DELETE/DETACH/etc.)
Raw {
sql: String,
},
}

/// Actions for ALTER COLUMN
Expand Down Expand Up @@ -8343,6 +8350,8 @@ pub struct WithFill {
#[serde(default)]
pub step: Option<Box<Expression>>,
#[serde(default)]
pub staleness: Option<Box<Expression>>,
#[serde(default)]
pub interpolate: Option<Box<Expression>>,
}

Expand Down
13 changes: 13 additions & 0 deletions crates/polyglot-sql/src/generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3823,6 +3823,7 @@ impl Generator {
}
JoinKind::Array => self.write_keyword("ARRAY JOIN"),
JoinKind::LeftArray => self.write_keyword("LEFT ARRAY JOIN"),
JoinKind::Paste => self.write_keyword("PASTE JOIN"),
}
}

Expand Down Expand Up @@ -8111,6 +8112,9 @@ impl Generator {
self.generate_expression(src)?;
}
}
AlterTableAction::Raw { sql } => {
self.write(sql);
}
}
Ok(())
}
Expand Down Expand Up @@ -14570,6 +14574,8 @@ impl Generator {
(IntervalUnit::Millisecond, true) => self.write_keyword("MILLISECONDS"),
(IntervalUnit::Microsecond, false) => self.write_keyword("MICROSECOND"),
(IntervalUnit::Microsecond, true) => self.write_keyword("MICROSECONDS"),
(IntervalUnit::Nanosecond, false) => self.write_keyword("NANOSECOND"),
(IntervalUnit::Nanosecond, true) => self.write_keyword("NANOSECONDS"),
}
}

Expand Down Expand Up @@ -31169,6 +31175,13 @@ impl Generator {
self.generate_expression(step)?;
}

if let Some(staleness) = &e.staleness {
self.write_space();
self.write_keyword("STALENESS");
self.write_space();
self.generate_expression(staleness)?;
}

if let Some(interpolate) = &e.interpolate {
self.write_space();
self.write_keyword("INTERPOLATE");
Expand Down
Loading
Loading