diff --git a/crates/lib-core/src/dialects/base.rs b/crates/lib-core/src/dialects/base.rs index 8fad02c1f..6996a51a9 100644 --- a/crates/lib-core/src/dialects/base.rs +++ b/crates/lib-core/src/dialects/base.rs @@ -3,7 +3,6 @@ use std::collections::hash_map::Entry; use std::fmt::Debug; use ahash::{AHashMap, AHashSet}; - use crate::dialects::init::DialectKind; use crate::dialects::syntax::SyntaxKind; use crate::helpers::{capitalize, ToMatchable}; diff --git a/crates/lib-core/src/dialects/init.rs b/crates/lib-core/src/dialects/init.rs index 829f7753b..ce344bd81 100644 --- a/crates/lib-core/src/dialects/init.rs +++ b/crates/lib-core/src/dialects/init.rs @@ -22,6 +22,7 @@ pub enum DialectKind { Athena, Bigquery, Clickhouse, + Databricks, Duckdb, Postgres, Redshift, diff --git a/crates/lib-core/src/dialects/syntax.rs b/crates/lib-core/src/dialects/syntax.rs index 5bb5ab8b1..7876d7553 100644 --- a/crates/lib-core/src/dialects/syntax.rs +++ b/crates/lib-core/src/dialects/syntax.rs @@ -359,6 +359,7 @@ pub enum SyntaxKind { CreateStreamStatement, AlterStreamStatement, ShowStatement, + ShowViewsStatement, AlterUserStatement, AlterSessionStatement, AlterSessionSetStatement, @@ -611,7 +612,7 @@ impl SyntaxKind { } #[derive(Clone, PartialEq, Eq, Default)] -pub struct SyntaxSet([u64; 9]); +pub struct SyntaxSet([u64; 10]); impl std::fmt::Debug for SyntaxSet { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -620,7 +621,7 @@ impl std::fmt::Debug for SyntaxSet { } impl SyntaxSet { - pub const EMPTY: SyntaxSet = Self([0; 9]); + pub const EMPTY: SyntaxSet = Self([0; 10]); const SLICE_BITS: u16 = u64::BITS as u16; pub const fn new(kinds: &[SyntaxKind]) -> Self { diff --git a/crates/lib-dialects/Cargo.toml b/crates/lib-dialects/Cargo.toml index b98f7b5a7..0615e5c54 100644 --- a/crates/lib-dialects/Cargo.toml +++ b/crates/lib-dialects/Cargo.toml @@ -21,6 +21,7 @@ default = [ "athena", "bigquery", "clickhouse", + "databricks", "duckdb", "hive", "postgres", @@ -33,6 +34,7 @@ default = [ athena = [] bigquery = [] clickhouse = [] +databricks = ["sparksql"] duckdb = ["postgres"] hive = [] postgres = [] diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs new file mode 100644 index 000000000..d8aa2a39a --- /dev/null +++ b/crates/lib-dialects/src/databricks.rs @@ -0,0 +1,1093 @@ +use std::collections::HashSet; + +use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS}; +use crate::sparksql; +use sqruff_lib_core::parser::grammar::anyof::AnyNumberOf; +use sqruff_lib_core::parser::grammar::delimited::Delimited; +use sqruff_lib_core::parser::grammar::sequence::Bracketed; +use sqruff_lib_core::parser::matchable::MatchableTrait; +use sqruff_lib_core::parser::segments::meta::MetaSegment; +use sqruff_lib_core::{ + dialects::{base::Dialect, init::DialectKind, syntax::SyntaxKind}, + helpers::{Config, ToMatchable}, + parser::{ + grammar::{anyof::one_of, base::Ref, sequence::Sequence}, + lexer::Matcher, + }, + vec_of_erased, +}; + +pub fn dialect() -> Dialect { + let raw_sparksql = sparksql::dialect(); + + let mut databricks = sparksql::dialect(); + databricks.name = DialectKind::Databricks; + + // What want to translate from Sqlfluff + // databricks_dialect.sets("unreserved_keywords").update(UNRESERVED_KEYWORDS) + // databricks_dialect.sets("unreserved_keywords").update( + // sparksql_dialect.sets("reserved_keywords") + // ) + // databricks_dialect.sets("unreserved_keywords").difference_update(RESERVED_KEYWORDS) + // databricks_dialect.sets("reserved_keywords").clear() + // databricks_dialect.sets("reserved_keywords").update(RESERVED_KEYWORDS) + // databricks_dialect.sets("date_part_function_name").update(["TIMEDIFF"]) + + databricks + .sets_mut("unreserved_keywords") + .extend(UNRESERVED_KEYWORDS); + databricks + .sets_mut("unreserved_keywords") + .extend(raw_sparksql.sets("reserved_keywords")); + databricks + .sets_mut("unreserved_keywords") + .retain(|x| !RESERVED_KEYWORDS.contains(x)); + databricks.sets_mut("reserved_keywords").clear(); + databricks + .sets_mut("reserved_keywords") + .extend(RESERVED_KEYWORDS); + databricks + .sets_mut("data_part_function_name") + .extend(["TIMEDIFF"]); + + println!("reserved {:?}", databricks.sets("reserved_keywords")); + println!("unreserved {:?}", databricks.sets("unreserved_keywords")); + + // databricks.sets_mut("reserverd_keywords").clear(); + // databricks.sets_mut("reserverd_keywords").extend(RESERVED_KEYWORDS); + + // databricks.sets_mut("data_part_function_name").extend(["TIMEDIFF"]); + + // Named Function Parameters: + // https://docs.databricks.com/en/sql/language-manual/sql-ref-function-invocation.html#named-parameter-invocation + databricks.insert_lexer_matchers( + vec![Matcher::string("right_array", "=>", SyntaxKind::RightArrow)], + "equals", + ); + + // Notebook Cell Delimiter: + // https://learn.microsoft.com/en-us/azure/databricks/notebooks/notebook-export-import#sql-1 + // // databricks.insert_lexer_matchers( + // vec![Match::regex( + // "command", + // r"(\r?\n){2}-- COMMAND ----------(\r?\n)", + // SyntaxKind::Code, + // )], + // "newline", + // ); + + // Datbricks Notebook Start: + // Needed to insert "so early" to avoid magic + notebook + // start to be interpreted as inline comment + // databricks.insert_lexer_matchers( + // vec![ + // Matcher::regex( + // "notebook_start", + // r"-- Databricks notebook source(\r?\n){1}", + // SyntaxKind::NotebookStart, + // ), + // Matcher::regex( + // "magic_line", + // r"(-- MAGIC)( [^%]{1})([^\n]*)", + // SyntaxKind::MagicLine, + // ), + // Matcher::regex( + // "magic_start", + // r"(-- MAGIC %)([^\n]{2,})(\r?\n)", + // SyntaxKind::MagicStart, + // ), + // ], + // "inline_comment", + // ); + + databricks.add([ + ( + "CatalogReferenceSegment".into(), + Ref::new("ObjectReferenceSegment").to_matchable().into(), + ), + ( + // SetOwnerGrammar=Sequence( + // Ref.keyword("SET", optional=True), + // "OWNER", + // "TO", + // Ref("PrincipalIdentifierSegment"), + // ), + "SetOwnerGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SET").optional(), + Ref::keyword("OWNER"), + Ref::keyword("TO"), + Ref::new("PrincipalIdentifierSegment"), + ]) + .to_matchable() + .into(), + ), + ( + "PredictiveOptimizationGrammar".into(), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("ENABLE"), + Ref::keyword("DISABLE"), + Ref::keyword("INHERIT"), + ]), + Ref::keyword("PREDICTIVE"), + Ref::keyword("OPTIMIZATION"), + ]) + .to_matchable() + .into(), + ), + ( + // https://docs.databricks.com/en/sql/language-manual/sql-ref-principal.html + "PrincipalIdentifierSegment".into(), + one_of(vec_of_erased![ + Ref::new("NakedIdentifierSegment"), + Ref::new("BackQuotedIdentifierSegment"), + ]) + .to_matchable() + .into(), + ), + ( + "AlterCatalogStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("CATALOG"), + Ref::new("CatalogReferenceSegment"), + one_of(vec_of_erased![ + Ref::new("SetOwnerGrammar"), + Ref::new("SetTagsGrammar"), + Ref::new("UnsetTagsGrammar"), + Ref::new("PredictiveOptimizationGrammar"), + ]), + ]) + .to_matchable() + .into(), + ), + ( + "SetTagsGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::keyword("TAGS"), + Ref::new("BracketedPropertyListGrammar"), + ]) + .to_matchable() + .into(), + ), + ( + "UnsetTagsGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("UNSET"), + Ref::keyword("TAGS"), + Ref::new("BracketedPropertyNameListGrammar"), + ]) + .to_matchable() + .into(), + ), + ( + "ColumnDefaultGrammar".into(), + one_of(vec_of_erased!( + Ref::new("LiteralGrammar"), + Ref::new("FucntionSegmenet"), + )) + .to_matchable() + .into(), + ), + ( + "ConstraintOptionGrammar".into(), + Sequence::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("ENABLE"), + Ref::keyword("NOVALIDATE") + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("NOT"), + Ref::keyword("ENFORCED") + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![Ref::keyword("DEFERRABLE")]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("INITIALLY"), + Ref::keyword("DEFERRED") + ]) + .config(|config| { config.optional() }), + one_of(vec_of_erased![Ref::keyword("NORELY"), Ref::keyword("RELY"),]) + .config(|config| { config.optional() }), + ]) + .to_matchable() + .into(), + ), + ( + "ForeignKeyOptionGrammar".into(), + Sequence::new(vec_of_erased![ + Sequence::new(vec_of_erased![Ref::keyword("MATCH"), Ref::keyword("FULL"),]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("ON"), + Ref::keyword("UPDATE"), + Ref::keyword("NO"), + Ref::keyword("ACTION"), + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("ON"), + Ref::keyword("DELETE"), + Ref::keyword("NO"), + Ref::keyword("ACTION"), + ]), + ]) + .to_matchable() + .into(), + ), + ( + "DropConstraintGrammar".into(), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("PrimaryKeyGrammar"), + Ref::new("IfExistsGrammar").optional(), + one_of(vec_of_erased![ + Ref::keyword("RESTRICT"), + Ref::keyword("CASCADE"), + ]) + .config(|config| config.optional()), + ]), + Sequence::new(vec_of_erased![ + Ref::new("ForeignKeyGrammar"), + Ref::new("IfExistsGrammar").optional(), + Bracketed::new(vec_of_erased![Ref::new("ColumnReferenceSegment")]), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("CONSTRAINT"), + Ref::new("IfExistsGrammar").optional(), + Ref::new("ObjectReferenceSegment"), + one_of(vec_of_erased![ + Ref::keyword("RESTRICT"), + Ref::keyword("CASCADE"), + ]) + .config(|config| config.optional()), + ]), + ]) + .to_matchable() + .into(), + ), + ( + "AlterPartitionGrammar".into(), + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![ + AnyNumberOf::new(vec_of_erased![one_of(vec_of_erased![ + Ref::new("ColumnReferenceSegment"), + Ref::new("SetClauseSegment"), + ]),]) + .config(|config| config.min_times(1)) + ])]) + .to_matchable() + .into(), + ), + ( + "RowFilterClauseGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("ROW"), + Ref::keyword("FILTER"), + Ref::new("ObjectReferenceSegment"), + Ref::keyword("ON"), + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![one_of( + vec_of_erased![ + Ref::new("ColumnReferenceSegment"), + Ref::new("LiteralGrammar"), + ] + )]) + .config(|config| config.optional())]) + ]) + .to_matchable() + .into(), + ), + // TODO Sort out the following grammar + // ( + // "PropertiesBackTickedIdentifierSegment".into(), + // Matcher::regex( + // "properties_naked_identifier", + // r"`.+`", + // SyntaxKind::PropertiesNakedIdentifier, + // ).to_matchable().into(), + // ), + ( + "LocationWithCredentialGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("LOCATION"), + Ref::new("QuotedLiteralSegment"), + Sequence::new(vec_of_erased![ + Ref::keyword("WITH"), + Bracketed::new(vec_of_erased![ + Ref::keyword("CREDENTIAL"), + Ref::new("PrincipalIdentifierSegment") + ]), + ]) + .config(|config| { config.optional() }), + ]) + .to_matchable() + .into(), + ), + ( + "ShowVolumesStatement".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SHOW"), + Ref::keyword("VOLUMES"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]), + Ref::new("DatabaseReferenceSegment"), + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE").optional(), + Ref::new("QuotedLiteralSegment"), + ]) + .config(|config| { config.optional() }), + // "VOLUMES", + // Sequence( + // OneOf("FROM", "IN"), + // Ref("DatabaseReferenceSegment"), + // optional=True, + // ), + // Sequence( + // Ref.keyword("LIKE", optional=True), + // Ref("QuotedLiteralSegment"), + // optional=True, + // ), + ]) + .to_matchable() + .into(), + ), + // // NotebookStart=TypedParser("notebook_start", CommentSegment, type="notebook_start"), + // // MagicLineGrammar=TypedParser("magic_line", CodeSegment, type="magic_line"), + // // MagicStartGrammar=TypedParser("magic_start", CodeSegment, type="magic_start"), + ( + "VariableNameIdentifierSegment".into(), + one_of(vec_of_erased![ + Ref::new("NakedIdentifierSegment"), + Ref::new("BackQuotedIdentifierSegment"), + ]) + .to_matchable() + .into(), + ), // // VariableNameIdentifierSegment=OneOf( + // // Ref("NakedIdentifierSegment"), + // // Ref("BackQuotedIdentifierSegment"), + // // ), + ]); + + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-views.html + // Only difference between this and the SparkSQL version: + // - `LIKE` keyword is optional + databricks.replace_grammar( + "ShowViewsStatement".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SHOW"), + Ref::keyword("VIEWS"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]), + Ref::new("DatabaseReferenceSegment"), + ]) + .config(|config| { + config.optional(); + }), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE").optional(), + Ref::new("QuotedLiteralSegment"), + ]) + .config(|config| { config.optional() }) + ]) + .to_matchable() + .into(), + ); + + let mut show_statements = sparksql::show_statements(); + show_statements.push(Ref::new("ShowVolumesStatement").to_matchable().into()); + databricks.replace_grammar( + "ShowStatement".into(), + one_of(show_statements).to_matchable().into(), + ); + + // An `ALTER DATABASE/SCHEMA` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-schema.html + databricks.replace_grammar( + "AlterDatabaseStatementSegment", + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + one_of(vec_of_erased![ + Ref::keyword("DATABASE"), + Ref::keyword("SCHEMA") + ]), + Ref::new("DatabaseReferenceSegment"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("DatabasePropertiesGrammar"), + ]), + Ref::new("SetOwnerGrammar"), + Ref::new("SetTagsGrammar"), + Ref::new("UnsetTagsGrammar"), + Ref::new("PredictiveOptimizationGrammar"), + ]), + ]) + .to_matchable() + .into(), + ); + + // An `ALTER TABLE` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-table.html + // match_grammar = Sequence( + // "ALTER", + // "TABLE", + // Ref("TableReferenceSegment"), + // Indent, + // OneOf( + // Sequence( + // "RENAME", + // "TO", + // Ref("TableReferenceSegment"), + // ), + // Sequence( + // "ADD", + // OneOf("COLUMNS", "COLUMN"), + // Indent, + // Bracketed( + // Delimited( + // Sequence( + // Ref("ColumnFieldDefinitionSegment"), + // Ref("ColumnDefaultGrammar", optional=True), + // Ref("CommentGrammar", optional=True), + // Ref("FirstOrAfterGrammar", optional=True), + // Ref("MaskStatementSegment", optional=True), + // ), + // ), + // ), + // Dedent, + // ), + // Sequence( + // OneOf("ALTER", "CHANGE"), + // Ref.keyword("COLUMN", optional=True), + // Ref("ColumnReferenceSegment"), + // OneOf( + // Ref("CommentGrammar"), + // Ref("FirstOrAfterGrammar"), + // Sequence( + // OneOf("SET", "DROP"), + // "NOT", + // "NULL", + // ), + // Sequence( + // "TYPE", + // Ref("DatatypeSegment"), + // ), + // Sequence( + // "SET", + // Ref("ColumnDefaultGrammar"), + // ), + // Sequence( + // "DROP", + // "DEFAULT", + // ), + // Sequence( + // "SYNC", + // "IDENTITY", + // ), + // Sequence( + // "SET", + // Ref("MaskStatementSegment"), + // ), + // Sequence( + // "DROP", + // "MASK", + // ), + // Ref("SetTagsGrammar"), + // Ref("UnsetTagsGrammar"), + // ), + // ), + // Sequence( + // "DROP", + // OneOf("COLUMN", "COLUMNS", optional=True), + // Ref("IfExistsGrammar", optional=True), + // OptionallyBracketed( + // Delimited( + // Ref("ColumnReferenceSegment"), + // ), + // ), + // ), + // Sequence( + // "RENAME", + // "COLUMN", + // Ref("ColumnReferenceSegment"), + // "TO", + // Ref("ColumnReferenceSegment"), + // ), + // Sequence( + // "ADD", + // Ref("TableConstraintSegment"), + // ), + // Ref("DropConstraintGrammar"), + // Sequence( + // "DROP", + // "FEATURE", + // Ref("ObjectReferenceSegment"), + // Sequence( + // "TRUNCATE", + // "HISTORY", + // optional=True, + // ), + // ), + // Sequence( + // "ADD", + // Ref("IfNotExistsGrammar", optional=True), + // AnyNumberOf(Ref("AlterPartitionGrammar")), + // ), + // Sequence( + // "DROP", + // Ref("IfExistsGrammar", optional=True), + // AnyNumberOf(Ref("AlterPartitionGrammar")), + // ), + // Sequence( + // Ref("AlterPartitionGrammar"), + // "SET", + // Ref("LocationGrammar"), + // ), + // Sequence( + // Ref("AlterPartitionGrammar"), + // "RENAME", + // "TO", + // Ref("AlterPartitionGrammar"), + // ), + // Sequence( + // "RECOVER", + // "PARTITIONS", + // ), + // Sequence( + // "SET", + // Ref("RowFilterClauseGrammar"), + // ), + // Sequence( + // "DROP", + // "ROW", + // "FILTER", + // ), + // Sequence( + // "SET", + // Ref("TablePropertiesGrammar"), + // ), + // Ref("UnsetTablePropertiesGrammar"), + // Sequence( + // "SET", + // "SERDE", + // Ref("QuotedLiteralSegment"), + // Sequence( + // "WITH", + // "SERDEPROPERTIES", + // Ref("BracketedPropertyListGrammar"), + // optional=True, + // ), + // ), + // Sequence( + // "SET", + // Ref("LocationGrammar"), + // ), + // Ref("SetOwnerGrammar"), + // Sequence( + // Sequence( + // "ALTER", + // "COLUMN", + // Ref("ColumnReferenceSegment"), + // optional=True, + // ), + // Ref("SetTagsGrammar"), + // ), + // Sequence( + // Sequence( + // "ALTER", + // "COLUMN", + // Ref("ColumnReferenceSegment"), + // optional=True, + // ), + // Ref("UnsetTagsGrammar"), + // ), + // Ref("ClusterByClauseSegment"), + // Ref("PredictiveOptimizationGrammar"), + // ), + // Dedent, + // ) + databricks.replace_grammar( + "AlterTableStatementSegment", + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("TABLE"), + Ref::new("TableReferenceSegment"), + MetaSegment::indent(), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("RENAME"), + Ref::keyword("TO"), + Ref::new("TableReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("ADD"), + one_of(vec_of_erased![ + Ref::keyword("COLUMNS"), + Ref::keyword("COLUMN") + ]), + MetaSegment::indent(), + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("ColumnFieldDefinitionSegment"), + Ref::new("ColumnDefaultGrammar").optional(), + Ref::new("CommentGrammar").optional(), + Ref::new("FirstOrAfterGrammar").optional(), + Ref::new("MaskStatementSegment").optional(), + ]), + ]),]), + MetaSegment::dedent(), + ]), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("CHANGE") + ]), + Ref::keyword("COLUMN").optional(), + Ref::new("ColumnReferenceSegment"), + one_of(vec_of_erased![ + Ref::new("CommentGrammar"), + Ref::new("FirstOrAfterGrammar"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("SET"), Ref::keyword("DROP")]), + Ref::keyword("NOT"), + Ref::keyword("NULL"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("TYPE"), + Ref::new("DatatypeSegment"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("ColumnDefaultGrammar"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::keyword("DEFAULT"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SYNC"), + Ref::keyword("IDENTITY"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("MaskStatementSegment"), + ]), + Sequence::new(vec_of_erased![Ref::keyword("DROP"), Ref::keyword("MASK"),]), + Ref::new("SetTagsGrammar"), + Ref::new("UnsetTagsGrammar"), + ]), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + one_of(vec_of_erased![ + Ref::keyword("COLUMN"), + Ref::keyword("COLUMNS") + ]) + .config(|config| { config.optional() }), + Ref::new("IfExistsGrammar").optional(), + one_of(vec_of_erased![ + Delimited::new(vec_of_erased![Ref::new("ColumnReferenceSegment")]), + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![Ref::new( + "ColumnReferenceSegment" + )]),]), + ]), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("RENAME"), + Ref::keyword("COLUMN"), + Ref::new("ColumnReferenceSegment"), + Ref::keyword("TO"), + Ref::new("ColumnReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("ADD"), + Ref::new("TableConstraintSegment"), + ]), + Ref::new("DropConstraintGrammar"), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::keyword("FEATURE"), + Ref::new("ObjectReferenceSegment"), + Sequence::new(vec_of_erased![ + Ref::keyword("TRUNCATE"), + Ref::keyword("HISTORY"), + ]) + .config(|config| { config.optional() }), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("ADD"), + Ref::new("IfNotExistsGrammar").optional(), + AnyNumberOf::new(vec_of_erased![Ref::new("AlterPartitionGrammar"),]), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::new("IfExistsGrammar").optional(), + AnyNumberOf::new(vec_of_erased![Ref::new("AlterPartitionGrammar"),]), + ]), + Sequence::new(vec_of_erased![ + Ref::new("AlterPartitionGrammar"), + Ref::keyword("SET"), + Ref::new("LocationGrammar"), + ]), + Sequence::new(vec_of_erased![ + Ref::new("AlterPartitionGrammar"), + Ref::keyword("RENAME"), + Ref::keyword("TO"), + Ref::new("AlterPartitionGrammar"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("RECOVER"), + Ref::keyword("PARTITIONS"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("RowFilterClauseGrammar"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::keyword("ROW"), + Ref::keyword("FILTER"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("TablePropertiesGrammar"), + ]), + Ref::new("UnsetTablePropertiesGrammar"), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::keyword("SERDE"), + Ref::new("QuotedLiteralSegment"), + Sequence::new(vec_of_erased![ + Ref::keyword("WITH"), + Ref::keyword("SERDEPROPERTIES"), + Ref::new("BracketedPropertyListGrammar"), + ]) + .config(|config| { config.optional() }), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("LocationGrammar"), + ]), + Ref::new("SetOwnerGrammar"), + Sequence::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("COLUMN"), + Ref::new("ColumnReferenceSegment"), + ]) + .config(|config| { config.optional() }), + Ref::new("SetTagsGrammar"), + ]), + Sequence::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("COLUMN"), + Ref::new("ColumnReferenceSegment"), + ]) + .config(|config| { config.optional() }), + Ref::new("UnsetTagsGrammar"), + ]), + Ref::new("ClusterByClauseSegment"), + Ref::new("PredictiveOptimizationGrammar"), + ]), + MetaSegment::dedent(), + ]) + .to_matchable() + .into(), + ); + + // `COMMENT ON` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-comment.html + databricks.add([( + "CommentOnStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("COMMENT"), + Ref::keyword("ON"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("CATALOG"), + Ref::new("CatalogReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("DATABASE"), + Ref::keyword("SCHEMA") + ]), + Ref::new("DatabaseReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("TABLE"), + Ref::new("TableReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("VOLUME"), + Ref::new("VolumeReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("CONNECTION"), + Ref::keyword("PROVIDER"), + Ref::keyword("RECIPIENT"), + Ref::keyword("SHARE"), + ]), + Ref::new("ObjectReferenceSegment"), + ]), + ]), + Ref::keyword("IS"), + one_of(vec_of_erased![ + Ref::new("QuotedLiteralSegment"), + Ref::keyword("NULL"), + ]), + ]) + .to_matchable() + .into(), + )]); + + databricks.add([( + "VolumeReferenceSegment".into(), + Ref::new("ObjectReferenceSegment").to_matchable().into(), + )]); + + // An `ALTER VOLUME` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-volume.html + databricks.add([( + "AlterVolumeStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("VOLUME"), + Ref::new("VolumeReferenceSegment"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("RENAME"), + Ref::keyword("TO"), + Ref::new("VolumeReferenceSegment"), + ]), + Ref::new("SetOwnerGrammar"), + Ref::new("SetTagsGrammar"), + Ref::new("UnsetTagsGrammar"), + ]), + ]) + .to_matchable() + .into(), + )]); + + // A `CREATE CATALOG` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-catalog.html + databricks.add([( + "CreateCatalogStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("CREATE"), + Ref::keyword("CATALOG"), + Ref::new("IfNotExistsGrammar").optional(), + Ref::new("CatalogReferenceSegment"), + Ref::new("CommentGrammar").optional(), + ]) + .to_matchable() + .into(), + )]); + + // A `DROP CATALOG` statement. + // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-drop-catalog.html + databricks.add([( + "DropCatalogStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::keyword("CATALOG"), + Ref::new("IfExistsGrammar").optional(), + Ref::new("CatalogReferenceSegment"), + Ref::new("DropBehaviorGrammar").optional(), + ]) + .to_matchable() + .into(), + )]); + + // A `SET TIME ZONE` statement. + // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html + databricks.add([( + "SetTimeZoneStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::keyword("TIME"), + Ref::keyword("ZONE"), + one_of(vec_of_erased![ + Ref::keyword("LOCAL"), + Ref::new("QuotedLiteralSegment"), + Ref::new("IntervalExpressionSegment") + ]), + ]) + .to_matchable() + .into(), + )]); + + // A `SET VARIABLE` statement used to set session variables. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-set-variable.html + // set var v1=val, v2=val2; + // # set var v1=val, v2=val2; + let kv_pair = Sequence::new(vec_of_erased![Delimited::new(vec_of_erased![ + Ref::new("VariableNameIdentifierSegment"), + Ref::new("EqualsSegment"), + one_of(vec_of_erased![ + Ref::keyword("DEFAULT"), + one_of(vec_of_erased![ + Bracketed::new(vec_of_erased![Ref::new("ExpressionSegment")]), + Ref::new("ExpressionSegment"), + ]), + ]), + ])]); + // set var (v1,v2) = (values(100,200)) + let bracketed_kv_pair = Sequence::new(vec_of_erased![ + Bracketed::new(vec_of_erased![Ref::new("VariableNameIdentifierSegment")]), + Ref::new("EqualsSegment"), + Bracketed::new(vec_of_erased![one_of(vec_of_erased![ + Ref::new("SelectStatementSegment"), + Ref::new("ValuesClauseSegment"), + ]),]), + ]); + databricks.add([( + "SetVariableStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + one_of(vec_of_erased![ + Ref::keyword("VAR"), + Ref::keyword("VARIABLE"), + ]), + one_of(vec_of_erased![kv_pair.clone(), bracketed_kv_pair.clone(),]) + .config(|config| config.allow_gaps = true), + ]) + .to_matchable() + .into(), + )]); + + databricks.add([ + ( + "DatabaseReferenceSegment".into(), + Ref::new("ObjectReferenceSegment").to_matchable().into(), + ) + ]); + // A `USE DATABASE` statement. + // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-usedb.html + databricks.replace_grammar( + "UseDatabaseStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("USE"), + one_of(vec_of_erased![ + Ref::keyword("DATABASE"), + Ref::keyword("SCHEMA") + ]) + .config(|config| { + config.optional(); + },), + Ref::new("DatabaseReferenceSegment"), + ]) + .to_matchable() + .into(), + ); + + // The parameters for a function ie. `(column type COMMENT 'comment')`. + databricks.add([( + "FunctionParameterListGrammarWithComments".into(), + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("FunctionParameterGrammar"), + AnyNumberOf::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("DEFAULT"), + Ref::new("LiteralGrammar"), + ]) + .config(|config| config.optional()), + Ref::new("CommentClauseSegment").optional(), + ]), + ]), + ])]) + .to_matchable() + .into(), + )]); + + // A `CREATE FUNCTION` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-sql-function.html + databricks.add([( + "CreateDatabricksFunctionStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("CREATE"), + Ref::new("OrReplaceGrammar").optional(), + Ref::new("TemporaryGrammar").optional(), + Ref::keyword("FUNCTION"), + Ref::new("IfNotExistsGrammar").optional(), + Ref::new("FunctionNameSegment"), + Ref::new("FunctionParameterListGrammarWithComments"), + Sequence::new(vec_of_erased![ + Ref::keyword("RETURNS"), + one_of(vec_of_erased![ + Ref::new("DatatypeSegment"), + Sequence::new(vec_of_erased![ + Ref::keyword("TABLE"), + Sequence::new(vec_of_erased![ + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("ColumnReferenceSegment"), + Ref::new("DatatypeSegment"), + Ref::new("CommentGrammar").optional(), + ]), + ]),]), + ]) + .config(|config| { config.optional() }), + ]), + ]) + .config(|config| { config.optional() }), + ]) + .config(|config| { config.optional() }), + Ref::new("FunctionDefinitionGrammar"), + + ]).to_matchable().into(), + )]); + + // Drop Volume Statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-drop-volume.html + databricks.add([( + "DropVolumeStatement".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::keyword("VOLUME"), + Ref::new("IfExistsGrammar").optional(), + Ref::new("VolumeReferenceSegment"), + ]) + .to_matchable() + .into(), + )]); + + databricks.replace_grammar( + "StatementSegment", + raw_sparksql + .grammar("StatementSegment") + .match_grammar() + .unwrap() + .copy( + Some(vec_of_erased![ + Ref::new("AlterCatalogStatementSegment"), + Ref::new("DropCatalogStatementSegment"), + Ref::new("AlterVolumeStatementSegment"), + Ref::new("CommentOnStatementSegment"), + Ref::new("CreateCatalogStatementSegment"), + Ref::new("SetVariableStatementSegment"), + Ref::new("SetTimeZoneStatementSegment"), + Ref::new("CreateDatabricksFunctionStatementSegment"), + Ref::new("FunctionParameterListGrammarWithComments"), + Ref::new("DropVolumeStatement"), + ]), + None, + None, + None, + Vec::new(), + false, + ), + ); + + databricks.expand(); + + return databricks; +} diff --git a/crates/lib-dialects/src/databricks_keywords.rs b/crates/lib-dialects/src/databricks_keywords.rs new file mode 100644 index 000000000..7ce50bdbc --- /dev/null +++ b/crates/lib-dialects/src/databricks_keywords.rs @@ -0,0 +1,47 @@ +/// Datbricks reserver keywords which can be found https://docs.databricks.com/sql/language-manual/sql-ref-reserved-words.html + +pub(crate) const RESERVED_KEYWORDS: &[&'static str] = &[ + "ANTI", + "CROSS", + "EXCEPT", + "FULL", + "INNER", + "INTERSECT", + "JOIN", + "LATERAL", + "LEFT", + "MINUS", + "NATURAL", + "ON", + "RIGHT", + "SEMI", + "UNION", + "USING", +]; + +pub(crate) const UNRESERVED_KEYWORDS: &[&'static str] = &[ + "CATALOG", + "COMPENSATION", + "CRON", + "ENFORCED", + "EVOLUTION", + "FEATURE", + "IDENTIFIER", + "MANAGED", + "MASK", + "NORELY", + "OPTIMIZATION", + "OPTIMIZE", + "PREDICTIVE", + "PROVIDER", + "PYTHON", + "RECIPIENT", + "RELY", + "SCHEDULE", + "SQL", + "TAGS", + "TIMESERIES", + "VOLUME", + "VOLUMES", + "ZORDER", +]; diff --git a/crates/lib-dialects/src/lib.rs b/crates/lib-dialects/src/lib.rs index 01ea46e54..4b2833579 100644 --- a/crates/lib-dialects/src/lib.rs +++ b/crates/lib-dialects/src/lib.rs @@ -15,6 +15,10 @@ mod bigquery_keywords; pub mod clickhouse; #[cfg(feature = "clickhouse")] mod clickhouse_keywords; +#[cfg(feature = "databricks")] +pub mod databricks; +#[cfg(feature = "databricks")] +pub mod databricks_keywords; #[cfg(feature = "duckdb")] pub mod duckdb; #[cfg(feature = "hive")] @@ -54,6 +58,8 @@ pub fn kind_to_dialect(kind: &DialectKind) -> Option { DialectKind::Bigquery => bigquery::dialect(), #[cfg(feature = "clickhouse")] DialectKind::Clickhouse => clickhouse::dialect(), + #[cfg(feature = "databricks")] + DialectKind::Databricks => databricks::dialect(), #[cfg(feature = "duckdb")] DialectKind::Duckdb => duckdb::dialect(), #[cfg(feature = "postgres")] diff --git a/crates/lib-dialects/src/sparksql.rs b/crates/lib-dialects/src/sparksql.rs index a6f3cd8b9..7b8d6274b 100644 --- a/crates/lib-dialects/src/sparksql.rs +++ b/crates/lib-dialects/src/sparksql.rs @@ -10,7 +10,7 @@ use sqruff_lib_core::parser::grammar::conditional::Conditional; use sqruff_lib_core::parser::grammar::delimited::Delimited; use sqruff_lib_core::parser::grammar::sequence::{Bracketed, Sequence}; use sqruff_lib_core::parser::lexer::Matcher; -use sqruff_lib_core::parser::matchable::MatchableTrait; +use sqruff_lib_core::parser::matchable::{Matchable, MatchableTrait}; use sqruff_lib_core::parser::node_matcher::NodeMatcher; use sqruff_lib_core::parser::parsers::{MultiStringParser, RegexParser, StringParser, TypedParser}; use sqruff_lib_core::parser::segments::bracketed::BracketedSegmentMatcher; @@ -2572,24 +2572,28 @@ pub fn dialect() -> Dialect { ), ( "ShowViewsStatement".into(), - Sequence::new(vec_of_erased![ - Ref::keyword("SHOW"), - Ref::keyword("VIEWS"), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]), - Ref::new("DatabaseReferenceSegment") - ]) - .config(|config| { - config.optional(); - }), + NodeMatcher::new( + SyntaxKind::ShowViewsStatement, Sequence::new(vec_of_erased![ - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment") + Ref::keyword("SHOW"), + Ref::keyword("VIEWS"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]), + Ref::new("DatabaseReferenceSegment") + ]) + .config(|config| { + config.optional(); + }), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment") + ]) + .config(|config| { + config.optional(); + }) ]) - .config(|config| { - config.optional(); - }) - ]) + .to_matchable(), + ) .to_matchable() .into(), ), @@ -2617,149 +2621,7 @@ pub fn dialect() -> Dialect { "ShowStatement".into(), NodeMatcher::new( SyntaxKind::ShowStatement, - one_of(vec_of_erased![ - Ref::new("ShowViewsStatement"), - Sequence::new(vec_of_erased![ - Ref::keyword("SHOW"), - one_of(vec_of_erased![ - Sequence::new(vec_of_erased![ - Ref::keyword("CREATE"), - Ref::keyword("TABLE"), - Ref::new("TableExpressionSegment"), - Sequence::new(vec_of_erased![ - Ref::keyword("AS"), - Ref::keyword("SERDE") - ]) - .config(|config| { - config.optional(); - }) - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("COLUMNS"), - Ref::keyword("IN"), - Ref::new("TableExpressionSegment"), - Sequence::new(vec_of_erased![ - Ref::keyword("IN"), - Ref::new("DatabaseReferenceSegment") - ]) - .config(|config| { - config.optional(); - }) - ]), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![ - Ref::keyword("DATABASES"), - Ref::keyword("SCHEMAS") - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment") - ]) - .config(|config| { - config.optional(); - }) - ]), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![ - Ref::keyword("USER"), - Ref::keyword("SYSTEM"), - Ref::keyword("ALL") - ]) - .config(|config| { - config.optional(); - }), - Ref::keyword("FUNCTIONS"), - one_of(vec_of_erased![ - Sequence::new(vec_of_erased![ - Ref::new("DatabaseReferenceSegment"), - Ref::new("DotSegment"), - Ref::new("FunctionNameSegment") - ]) - .config(|config| { - config.disallow_gaps(); - config.optional(); - }), - Ref::new("FunctionNameSegment").optional(), - Sequence::new(vec_of_erased![ - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment") - ]) - .config(|config| { - config.optional(); - }) - ]) - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("PARTITIONS"), - Ref::new("TableReferenceSegment"), - Ref::new("PartitionSpecGrammar").optional() - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("TABLE"), - Ref::keyword("EXTENDED"), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![ - Ref::keyword("IN"), - Ref::keyword("FROM") - ]), - Ref::new("DatabaseReferenceSegment") - ]) - .config(|config| { - config.optional(); - }), - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment"), - Ref::new("PartitionSpecGrammar").optional() - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("TABLES"), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![ - Ref::keyword("FROM"), - Ref::keyword("IN") - ]), - Ref::new("DatabaseReferenceSegment") - ]) - .config(|config| { - config.optional(); - }), - Sequence::new(vec_of_erased![ - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment") - ]) - .config(|config| { - config.optional(); - }) - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("TBLPROPERTIES"), - Ref::new("TableReferenceSegment"), - Ref::new("BracketedPropertyNameListGrammar").optional() - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("VIEWS"), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![ - Ref::keyword("FROM"), - Ref::keyword("IN") - ]), - Ref::new("DatabaseReferenceSegment") - ]) - .config(|config| { - config.optional(); - }), - Sequence::new(vec_of_erased![ - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment") - ]) - .config(|config| { - config.optional(); - }) - ]) - ]) - ]) - ]) - .to_matchable(), + one_of(show_statements()).to_matchable(), ) .to_matchable() .into(), @@ -3545,3 +3407,136 @@ pub fn dialect() -> Dialect { sparksql_dialect.expand(); sparksql_dialect } + +pub fn show_statements() -> Vec { + vec_of_erased![ + Ref::new("ShowViewsStatement"), + Sequence::new(vec_of_erased![ + Ref::keyword("SHOW"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("CREATE"), + Ref::keyword("TABLE"), + Ref::new("TableExpressionSegment"), + Sequence::new(vec_of_erased![Ref::keyword("AS"), Ref::keyword("SERDE")]) + .config(|config| { + config.optional(); + }) + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("COLUMNS"), + Ref::keyword("IN"), + Ref::new("TableExpressionSegment"), + Sequence::new(vec_of_erased![ + Ref::keyword("IN"), + Ref::new("DatabaseReferenceSegment") + ]) + .config(|config| { + config.optional(); + }) + ]), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("DATABASES"), + Ref::keyword("SCHEMAS") + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment") + ]) + .config(|config| { + config.optional(); + }) + ]), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("USER"), + Ref::keyword("SYSTEM"), + Ref::keyword("ALL") + ]) + .config(|config| { + config.optional(); + }), + Ref::keyword("FUNCTIONS"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("DatabaseReferenceSegment"), + Ref::new("DotSegment"), + Ref::new("FunctionNameSegment") + ]) + .config(|config| { + config.disallow_gaps(); + config.optional(); + }), + Ref::new("FunctionNameSegment").optional(), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment") + ]) + .config(|config| { + config.optional(); + }) + ]) + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("PARTITIONS"), + Ref::new("TableReferenceSegment"), + Ref::new("PartitionSpecGrammar").optional() + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("TABLE"), + Ref::keyword("EXTENDED"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("IN"), Ref::keyword("FROM")]), + Ref::new("DatabaseReferenceSegment") + ]) + .config(|config| { + config.optional(); + }), + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment"), + Ref::new("PartitionSpecGrammar").optional() + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("TABLES"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]), + Ref::new("DatabaseReferenceSegment") + ]) + .config(|config| { + config.optional(); + }), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment") + ]) + .config(|config| { + config.optional(); + }) + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("TBLPROPERTIES"), + Ref::new("TableReferenceSegment"), + Ref::new("BracketedPropertyNameListGrammar").optional() + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("VIEWS"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]), + Ref::new("DatabaseReferenceSegment") + ]) + .config(|config| { + config.optional(); + }), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment") + ]) + .config(|config| { + config.optional(); + }) + ]) + ]) + ]) + ] +} diff --git a/crates/lib-dialects/src/sparksql_keywords.rs b/crates/lib-dialects/src/sparksql_keywords.rs index 849e2f7ba..eb2b42f58 100644 --- a/crates/lib-dialects/src/sparksql_keywords.rs +++ b/crates/lib-dialects/src/sparksql_keywords.rs @@ -270,6 +270,8 @@ pub(crate) const UNRESERVED_KEYWORDS: &[&str] = &[ "UPDATE", "USE", "VALUES", + "VAR", + "VARIABLE", "VIEW", "VIEWS", "WRITE", diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/.sqlfluff b/crates/lib-dialects/test/fixtures/dialects/databricks/.sqlfluff new file mode 100644 index 000000000..5aae42e07 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/.sqlfluff @@ -0,0 +1,2 @@ +[sqlfluff] +dialect = databricks diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.sql new file mode 100644 index 000000000..816d7a7bd --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.sql @@ -0,0 +1,18 @@ +-- Transfer ownership of the catalog to another user +ALTER CATALOG some_cat OWNER TO `alf@melmak.et`; +ALTER CATALOG some_cat OWNER TO my_group; + +-- SET is allowed as an optional keyword +ALTER CATALOG some_cat SET OWNER TO `alf@melmak.et`; +ALTER CATALOG some_cat SET OWNER TO my_group; + +-- Set and unset catalog tags +ALTER CATALOG some_cat SET TAGS ('tag1'='value1'); +ALTER CATALOG some_cat SET TAGS ('tag2'='value2', 'tag3'='value3'); +ALTER CATALOG some_cat UNSET TAGS ('tag1'); +ALTER CATALOG some_cat UNSET TAGS ('tag2', 'tag3'); + +-- Enable/Inherit/Disable Predictive Optimization +ALTER CATALOG some_cat ENABLE PREDICTIVE OPTIMIZATION; +ALTER CATALOG some_cat INHERIT PREDICTIVE OPTIMIZATION; +ALTER CATALOG some_cat DISABLE PREDICTIVE OPTIMIZATION; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml new file mode 100644 index 000000000..076f64d0d --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml @@ -0,0 +1,133 @@ +file: +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value3''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: ENABLE + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: INHERIT + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: DISABLE + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.sql new file mode 100644 index 000000000..7351e47f6 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.sql @@ -0,0 +1,27 @@ +-- Transfer ownership of the schema to another user +ALTER SCHEMA some_cat OWNER TO `alf@melmak.et`; +ALTER SCHEMA some_cat OWNER TO my_group; + +-- SET is allowed as an optional keyword +ALTER SCHEMA some_cat SET OWNER TO `alf@melmak.et`; +ALTER SCHEMA some_cat SET OWNER TO my_group; + +-- DATABASE IS ALLOWED INSTEAD OF SCHEMA +ALTER DATABASE some_cat OWNER TO `alf@melmak.et`; +ALTER DATABASE some_cat SET OWNER TO `alf@melmak.et`; +ALTER DATABASE some_cat OWNER TO my_group; +ALTER DATABASE some_cat SET OWNER TO my_group; + +-- Set and unset schema tags +ALTER SCHEMA some_cat SET TAGS ('tag1'='value1'); +ALTER DATABASE some_cat SET TAGS ('tag2'='value2', 'tag3'='value3'); +ALTER DATABASE some_cat UNSET TAGS ('tag1'); +ALTER SCHEMA some_cat UNSET TAGS ('tag2', 'tag3'); + +-- Enable/Inherit/Disable Predictive Optimization +ALTER SCHEMA some_cat ENABLE PREDICTIVE OPTIMIZATION; +ALTER DATABASE some_cat INHERIT PREDICTIVE OPTIMIZATION; +ALTER SCHEMA some_cat DISABLE PREDICTIVE OPTIMIZATION; + +-- -- Add some schema properties +ALTER SCHEMA some_cat SET DBPROPERTIES ('Edited-by'='John Doe', 'Edit-date'='2020-01-01'); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml new file mode 100644 index 000000000..70da4cddb --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml @@ -0,0 +1,209 @@ +file: +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - object_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - object_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - object_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - object_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value3''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - object_reference: + - naked_identifier: some_cat + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - object_reference: + - naked_identifier: some_cat + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - object_reference: + - naked_identifier: some_cat + - keyword: ENABLE + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - object_reference: + - naked_identifier: some_cat + - keyword: INHERIT + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - object_reference: + - naked_identifier: some_cat + - keyword: DISABLE + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: DBPROPERTIES + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''Edited-by''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''John Doe''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''Edit-date''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''2020-01-01''' + - end_bracket: ) +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.sql new file mode 100644 index 000000000..4399cdac9 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.sql @@ -0,0 +1,95 @@ +-- ALTER TABLE examples from Databricks documentation +-- https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-table.html + +ALTER TABLE Student RENAME TO StudentInfo; + +ALTER TABLE default.StudentInfo PARTITION (age='10') RENAME TO PARTITION (age='15'); + +ALTER TABLE StudentInfo ADD columns (LastName string, DOB timestamp); + +ALTER TABLE StudentInfo DROP COLUMN (DOB); + +ALTER TABLE StudentInfo DROP COLUMNS IF EXISTS (LastName, DOB); + +ALTER TABLE StudentInfo ADD IF NOT EXISTS PARTITION (age=18); + +ALTER TABLE StudentInfo DROP IF EXISTS PARTITION (age=18); + +ALTER TABLE StudentInfo ADD IF NOT EXISTS PARTITION (age=18) PARTITION (age=20); + +ALTER TABLE StudentInfo RECOVER PARTITIONS; + +ALTER TABLE StudentInfo ALTER COLUMN name COMMENT "new comment"; + +ALTER TABLE StudentInfo RENAME COLUMN name TO FirstName; + +-- Change the file Location +ALTER TABLE dbx.tab1 PARTITION (a='1', b='2') SET LOCATION '/path/to/part/ways'; + +-- SET SERDE/ SERDE Properties (DBR only) +ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; + +ALTER TABLE dbx.tab1 SET SERDE 'org.apache.hadoop' WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee'); + +-- SET TABLE PROPERTIES +ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('winner' = 'loser'); + +-- DROP TABLE PROPERTIES +ALTER TABLE dbx.tab1 UNSET TBLPROPERTIES ('winner'); + +-- Drop the "deletion vectors" from a Delta table +ALTER TABLE my_table DROP FEATURE deletionVectors; + +-- 24 hours later +ALTER TABLE my_table DROP FEATURE deletionVectors TRUNCATE HISTORY; + +-- Applies three tags to the table named `test`. +ALTER TABLE test SET TAGS ('tag1' = 'val1', 'tag2' = 'val2', 'tag3' = 'val3'); + +-- Removes three tags from the table named `test`. +ALTER TABLE test UNSET TAGS ('tag1', 'tag2', 'tag3'); + +-- Applies three tags to table `main.schema1.test` column `col1`. +ALTER TABLE main.schema1.test ALTER COLUMN col1 SET TAGS ('tag1' = 'val1', 'tag2' = 'val2', 'tag3' = 'val3'); + +-- Removes three tags from table `main.schema1.test` column `col1`. +ALTER TABLE main.schema1.test ALTER COLUMN col1 UNSET TAGS ('tag1', 'tag2', 'tag3'); + +-- Enables predictive optimization for my_table +ALTER TABLE my_table ENABLE PREDICTIVE OPTIMIZATION; + +ALTER TABLE sales SET ROW FILTER us_filter ON (); + +ALTER TABLE sales SET ROW FILTER us_filter ON (region); + +ALTER TABLE sales DROP ROW FILTER; + +ALTER TABLE users ALTER COLUMN ssn SET MASK ssn_mask; + +ALTER TABLE users ALTER COLUMN ssn SET MASK ssn_mask USING COLUMNS (ssn_value); + +ALTER TABLE users ALTER COLUMN ssn DROP MASK; + +ALTER TABLE persons ADD CONSTRAINT persons_pk PRIMARY KEY(first_name, last_name); + +ALTER TABLE pets ADD CONSTRAINT pets_persons_fk + FOREIGN KEY(owner_first_name, owner_last_name) REFERENCES persons + NOT ENFORCED RELY; + +ALTER TABLE pets ADD CONSTRAINT pets_name_not_cute_chk CHECK (length(name) < 20); + +ALTER TABLE pets DROP CONSTRAINT pets_name_not_cute_chk; + +ALTER TABLE persons DROP CONSTRAINT persons_pk RESTRICT; + +ALTER TABLE pets DROP FOREIGN KEY IF EXISTS (owner_first_name, owner_last_name); + +ALTER TABLE persons DROP PRIMARY KEY CASCADE; + +ALTER TABLE rocks DROP COLUMN rock; + +ALTER TABLE rocks DROP COLUMN rock, loc; + +ALTER TABLE rocks DROP COLUMN IF EXISTS rock, loc; + +ALTER TABLE rocks DROP COLUMN IF EXISTS (rock, loc); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml new file mode 100644 index 000000000..84d9de44a --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml @@ -0,0 +1,500 @@ +file: +- statement: + - alter_table_statement: + - keyword: ALTER + - keyword: TABLE + - table_reference: + - naked_identifier: Student + - keyword: RENAME + - keyword: TO + - table_reference: + - naked_identifier: StudentInfo +- statement_terminator: ; +- file: + - word: ALTER + - word: TABLE + - word: default + - dot: . + - word: StudentInfo + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - single_quote: '''10''' + - end_bracket: ) + - word: RENAME + - word: TO + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - single_quote: '''15''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: ADD + - word: columns + - start_bracket: ( + - word: LastName + - word: string + - comma: ',' + - word: DOB + - word: timestamp + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: DROP + - word: COLUMN + - start_bracket: ( + - word: DOB + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: DROP + - word: COLUMNS + - word: IF + - word: EXISTS + - start_bracket: ( + - word: LastName + - comma: ',' + - word: DOB + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: ADD + - word: IF + - word: NOT + - word: EXISTS + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - numeric_literal: '18' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: DROP + - word: IF + - word: EXISTS + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - numeric_literal: '18' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: ADD + - word: IF + - word: NOT + - word: EXISTS + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - numeric_literal: '18' + - end_bracket: ) + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - numeric_literal: '20' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: RECOVER + - word: PARTITIONS + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: ALTER + - word: COLUMN + - word: name + - word: COMMENT + - double_quote: '"new comment"' + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: RENAME + - word: COLUMN + - word: name + - word: TO + - word: FirstName + - semicolon: ; + - word: ALTER + - word: TABLE + - word: dbx + - dot: . + - word: tab1 + - word: PARTITION + - start_bracket: ( + - word: a + - raw_comparison_operator: = + - single_quote: '''1''' + - comma: ',' + - word: b + - raw_comparison_operator: = + - single_quote: '''2''' + - end_bracket: ) + - word: SET + - word: LOCATION + - single_quote: '''/path/to/part/ways''' + - semicolon: ; + - word: ALTER + - word: TABLE + - word: test_tab + - word: SET + - word: SERDE + - single_quote: '''org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe''' + - semicolon: ; + - word: ALTER + - word: TABLE + - word: dbx + - dot: . + - word: tab1 + - word: SET + - word: SERDE + - single_quote: '''org.apache.hadoop''' + - word: WITH + - word: SERDEPROPERTIES + - start_bracket: ( + - single_quote: '''k''' + - raw_comparison_operator: = + - single_quote: '''v''' + - comma: ',' + - single_quote: '''kay''' + - raw_comparison_operator: = + - single_quote: '''vee''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: dbx + - dot: . + - word: tab1 + - word: SET + - word: TBLPROPERTIES + - start_bracket: ( + - single_quote: '''winner''' + - raw_comparison_operator: = + - single_quote: '''loser''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: dbx + - dot: . + - word: tab1 + - word: UNSET + - word: TBLPROPERTIES + - start_bracket: ( + - single_quote: '''winner''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: my_table + - word: DROP + - word: FEATURE + - word: deletionVectors + - semicolon: ; + - word: ALTER + - word: TABLE + - word: my_table + - word: DROP + - word: FEATURE + - word: deletionVectors + - word: TRUNCATE + - word: HISTORY + - semicolon: ; + - word: ALTER + - word: TABLE + - word: test + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - raw_comparison_operator: = + - single_quote: '''val1''' + - comma: ',' + - single_quote: '''tag2''' + - raw_comparison_operator: = + - single_quote: '''val2''' + - comma: ',' + - single_quote: '''tag3''' + - raw_comparison_operator: = + - single_quote: '''val3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: test + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - comma: ',' + - single_quote: '''tag2''' + - comma: ',' + - single_quote: '''tag3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: main + - dot: . + - word: schema1 + - dot: . + - word: test + - word: ALTER + - word: COLUMN + - word: col1 + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - raw_comparison_operator: = + - single_quote: '''val1''' + - comma: ',' + - single_quote: '''tag2''' + - raw_comparison_operator: = + - single_quote: '''val2''' + - comma: ',' + - single_quote: '''tag3''' + - raw_comparison_operator: = + - single_quote: '''val3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: main + - dot: . + - word: schema1 + - dot: . + - word: test + - word: ALTER + - word: COLUMN + - word: col1 + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - comma: ',' + - single_quote: '''tag2''' + - comma: ',' + - single_quote: '''tag3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: my_table + - word: ENABLE + - word: PREDICTIVE + - word: OPTIMIZATION + - semicolon: ; + - word: ALTER + - word: TABLE + - word: sales + - word: SET + - word: ROW + - word: FILTER + - word: us_filter + - word: ON + - start_bracket: ( + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: sales + - word: SET + - word: ROW + - word: FILTER + - word: us_filter + - word: ON + - start_bracket: ( + - word: region + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: sales + - word: DROP + - word: ROW + - word: FILTER + - semicolon: ; + - word: ALTER + - word: TABLE + - word: users + - word: ALTER + - word: COLUMN + - word: ssn + - word: SET + - word: MASK + - word: ssn_mask + - semicolon: ; + - word: ALTER + - word: TABLE + - word: users + - word: ALTER + - word: COLUMN + - word: ssn + - word: SET + - word: MASK + - word: ssn_mask + - word: USING + - word: COLUMNS + - start_bracket: ( + - word: ssn_value + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: users + - word: ALTER + - word: COLUMN + - word: ssn + - word: DROP + - word: MASK + - semicolon: ; + - word: ALTER + - word: TABLE + - word: persons + - word: ADD + - word: CONSTRAINT + - word: persons_pk + - word: PRIMARY + - word: KEY + - start_bracket: ( + - word: first_name + - comma: ',' + - word: last_name + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: pets + - word: ADD + - word: CONSTRAINT + - word: pets_persons_fk + - word: FOREIGN + - word: KEY + - start_bracket: ( + - word: owner_first_name + - comma: ',' + - word: owner_last_name + - end_bracket: ) + - word: REFERENCES + - word: persons + - word: NOT + - word: ENFORCED + - word: RELY + - semicolon: ; + - word: ALTER + - word: TABLE + - word: pets + - word: ADD + - word: CONSTRAINT + - word: pets_name_not_cute_chk + - word: CHECK + - start_bracket: ( + - word: length + - start_bracket: ( + - word: name + - end_bracket: ) + - raw_comparison_operator: < + - numeric_literal: '20' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: pets + - word: DROP + - word: CONSTRAINT + - word: pets_name_not_cute_chk + - semicolon: ; + - word: ALTER + - word: TABLE + - word: persons + - word: DROP + - word: CONSTRAINT + - word: persons_pk + - word: RESTRICT + - semicolon: ; + - word: ALTER + - word: TABLE + - word: pets + - word: DROP + - word: FOREIGN + - word: KEY + - word: IF + - word: EXISTS + - start_bracket: ( + - word: owner_first_name + - comma: ',' + - word: owner_last_name + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: persons + - word: DROP + - word: PRIMARY + - word: KEY + - word: CASCADE + - semicolon: ; + - word: ALTER + - word: TABLE + - word: rocks + - word: DROP + - word: COLUMN + - word: rock + - semicolon: ; + - word: ALTER + - word: TABLE + - word: rocks + - word: DROP + - word: COLUMN + - word: rock + - comma: ',' + - word: loc + - semicolon: ; + - word: ALTER + - word: TABLE + - word: rocks + - word: DROP + - word: COLUMN + - word: IF + - word: EXISTS + - word: rock + - comma: ',' + - word: loc + - semicolon: ; + - word: ALTER + - word: TABLE + - word: rocks + - word: DROP + - word: COLUMN + - word: IF + - word: EXISTS + - start_bracket: ( + - word: rock + - comma: ',' + - word: loc + - end_bracket: ) + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.sql new file mode 100644 index 000000000..a4c57db08 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.sql @@ -0,0 +1,34 @@ +-- ALTER TABLE examples from Databricks documentation +-- https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-view.html + +ALTER VIEW tempsc1.v1 RENAME TO tempsc1.v2; + +ALTER VIEW IDENTIFIER('tempsc1.v1') RENAME TO IDENTIFIER('tempsc1.v2'); + +ALTER VIEW tempsc1.v2 SET TBLPROPERTIES ('created.by.user' = "John", 'created.date' = '01-01-2001' ); + +ALTER VIEW tempsc1.v2 UNSET TBLPROPERTIES (`created`.`by`.`user`, created.date); + +ALTER VIEW tempsc1.v2 AS SELECT * FROM tempsc1.v1; + +ALTER VIEW v1 OWNER TO `alf@melmak.et`; + +ALTER VIEW v1 SET OWNER TO `alf@melmak.et`; + +ALTER VIEW v1 WITH SCHEMA BINDING; +ALTER VIEW v1 WITH SCHEMA COMPENSATION; +ALTER VIEW v1 WITH SCHEMA TYPE EVOLUTION; +ALTER VIEW v1 WITH SCHEMA EVOLUTION; + +ALTER MATERIALIZED VIEW my_mv + ADD SCHEDULE CRON '0 0 0 * * ? *' AT TIME ZONE 'America/Los_Angeles'; + +ALTER MATERIALIZED VIEW my_mv + ALTER SCHEDULE CRON '0 0/15 * * * ? *'; + +ALTER MATERIALIZED VIEW my_mv + DROP SCHEDULE; + +ALTER VIEW test SET TAGS ('tag1' = 'val1', 'tag2' = 'val2', 'tag3' = 'val3'); + +ALTER VIEW test UNSET TAGS ('tag1', 'tag2', 'tag3'); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.yml new file mode 100644 index 000000000..3503139a0 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.yml @@ -0,0 +1,184 @@ +file: +- statement: + - alter_view_statement: + - keyword: ALTER + - keyword: VIEW + - table_reference: + - naked_identifier: tempsc1 + - dot: . + - naked_identifier: v1 + - keyword: RENAME + - keyword: TO + - table_reference: + - naked_identifier: tempsc1 + - dot: . + - naked_identifier: v2 +- statement_terminator: ; +- file: + - word: ALTER + - word: VIEW + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''tempsc1.v1''' + - end_bracket: ) + - word: RENAME + - word: TO + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''tempsc1.v2''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VIEW + - word: tempsc1 + - dot: . + - word: v2 + - word: SET + - word: TBLPROPERTIES + - start_bracket: ( + - single_quote: '''created.by.user''' + - raw_comparison_operator: = + - double_quote: '"John"' + - comma: ',' + - single_quote: '''created.date''' + - raw_comparison_operator: = + - single_quote: '''01-01-2001''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VIEW + - word: tempsc1 + - dot: . + - word: v2 + - word: UNSET + - word: TBLPROPERTIES + - start_bracket: ( + - back_quote: '`created`' + - dot: . + - back_quote: '`by`' + - dot: . + - back_quote: '`user`' + - comma: ',' + - word: created + - dot: . + - word: date + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VIEW + - word: tempsc1 + - dot: . + - word: v2 + - word: AS + - word: SELECT + - star: '*' + - word: FROM + - word: tempsc1 + - dot: . + - word: v1 + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: SET + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: WITH + - word: SCHEMA + - word: BINDING + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: WITH + - word: SCHEMA + - word: COMPENSATION + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: WITH + - word: SCHEMA + - word: TYPE + - word: EVOLUTION + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: WITH + - word: SCHEMA + - word: EVOLUTION + - semicolon: ; + - word: ALTER + - word: MATERIALIZED + - word: VIEW + - word: my_mv + - word: ADD + - word: SCHEDULE + - word: CRON + - single_quote: '''0 0 0 * * ? *''' + - word: AT + - word: TIME + - word: ZONE + - single_quote: '''America/Los_Angeles''' + - semicolon: ; + - word: ALTER + - word: MATERIALIZED + - word: VIEW + - word: my_mv + - word: ALTER + - word: SCHEDULE + - word: CRON + - single_quote: '''0 0/15 * * * ? *''' + - semicolon: ; + - word: ALTER + - word: MATERIALIZED + - word: VIEW + - word: my_mv + - word: DROP + - word: SCHEDULE + - semicolon: ; + - word: ALTER + - word: VIEW + - word: test + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - raw_comparison_operator: = + - single_quote: '''val1''' + - comma: ',' + - single_quote: '''tag2''' + - raw_comparison_operator: = + - single_quote: '''val2''' + - comma: ',' + - single_quote: '''tag3''' + - raw_comparison_operator: = + - single_quote: '''val3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VIEW + - word: test + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - comma: ',' + - single_quote: '''tag2''' + - comma: ',' + - single_quote: '''tag3''' + - end_bracket: ) + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.sql new file mode 100644 index 000000000..417e74ee1 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.sql @@ -0,0 +1,16 @@ +-- Rename a volume +ALTER VOLUME some_vol RENAME TO some_new_vol; + +-- Transfer ownership of the volume to another user +ALTER VOLUME some_vol OWNER TO `alf@melmak.et`; +ALTER VOLUME some_vol OWNER TO my_group; + +-- SET is allowed as an optional keyword +ALTER VOLUME some_vol SET OWNER TO `alf@melmak.et`; +ALTER VOLUME some_vol SET OWNER TO my_group; + +-- Set and unset volume tags +ALTER VOLUME some_vol SET TAGS ('tag1'='value1'); +ALTER VOLUME some_vol SET TAGS ('tag2'='value2', 'tag3'='value3'); +ALTER VOLUME some_vol UNSET TAGS ('tag1'); +ALTER VOLUME some_vol UNSET TAGS ('tag2', 'tag3'); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml new file mode 100644 index 000000000..ad6ab7d6e --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml @@ -0,0 +1,116 @@ +file: +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: RENAME + - keyword: TO + - object_reference: + - naked_identifier: some_new_vol +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: SET + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: SET + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value3''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - end_bracket: ) +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.sql new file mode 100644 index 000000000..ad6185285 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.sql @@ -0,0 +1,12 @@ +-- Databricks notebook source + +SELECT COL1 FROM TABLE1 + +-- COMMAND ---------- + +SELECT COL2 FROM TABLE2 + +-- COMMAND ---------- + +SELECT COL3 FROM TABLE3; +SELECT COL4 FROM TABLE4; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.yml new file mode 100644 index 000000000..96c7c58ca --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.yml @@ -0,0 +1,40 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: COL1 + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: TABLE1 + - unparsable: + - word: SELECT + - word: COL2 + - word: FROM + - word: TABLE2 + - word: SELECT + - word: COL3 + - word: FROM + - word: TABLE3 +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: COL4 + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: TABLE4 +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.sql new file mode 100644 index 000000000..3b91abfa8 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.sql @@ -0,0 +1,19 @@ +COMMENT ON CATALOG my_catalog IS 'This is my catalog'; + +COMMENT ON CONNECTION mysql_connection IS 'this is a mysql connection'; + +COMMENT ON SCHEMA my_schema IS 'This is my schema'; + +COMMENT ON DATABASE my_other_schema IS 'This is my other schema'; + +COMMENT ON TABLE my_table IS 'This is my table'; + +COMMENT ON TABLE my_table IS NULL; + +COMMENT ON SHARE my_share IS 'A good share'; + +COMMENT ON RECIPIENT my_recipient IS 'A good recipient'; + +COMMENT ON PROVIDER my_provider IS 'A good provider'; + +COMMENT ON VOLUME my_volume IS 'Huge volume'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml new file mode 100644 index 000000000..9aa2ef3b8 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml @@ -0,0 +1,91 @@ +file: +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: CATALOG + - object_reference: + - naked_identifier: my_catalog + - keyword: IS + - quoted_literal: '''This is my catalog''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: CONNECTION + - object_reference: + - naked_identifier: mysql_connection + - keyword: IS + - quoted_literal: '''this is a mysql connection''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: SCHEMA + - object_reference: + - naked_identifier: my_schema + - keyword: IS + - quoted_literal: '''This is my schema''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: DATABASE + - object_reference: + - naked_identifier: my_other_schema + - keyword: IS + - quoted_literal: '''This is my other schema''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: TABLE + - table_reference: + - naked_identifier: my_table + - keyword: IS + - quoted_literal: '''This is my table''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: TABLE + - table_reference: + - naked_identifier: my_table + - keyword: IS + - keyword: 'NULL' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: SHARE + - object_reference: + - naked_identifier: my_share + - keyword: IS + - quoted_literal: '''A good share''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: RECIPIENT + - object_reference: + - naked_identifier: my_recipient + - keyword: IS + - quoted_literal: '''A good recipient''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: PROVIDER + - object_reference: + - naked_identifier: my_provider + - keyword: IS + - quoted_literal: '''A good provider''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: VOLUME + - object_reference: + - naked_identifier: my_volume + - keyword: IS + - quoted_literal: '''Huge volume''' +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.sql new file mode 100644 index 000000000..04803bbec --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.sql @@ -0,0 +1,9 @@ +-- Create catalog `customer_cat`. +-- This throws exception if catalog with name customer_cat already exists. +CREATE CATALOG customer_cat; + +-- Create catalog `customer_cat` only if catalog with same name doesn't exist. +CREATE CATALOG IF NOT EXISTS customer_cat; + +-- Create catalog `customer_cat` only if catalog with same name doesn't exist, with a comment. +CREATE CATALOG IF NOT EXISTS customer_cat COMMENT 'This is customer catalog'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml new file mode 100644 index 000000000..bd6cb1070 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml @@ -0,0 +1,27 @@ +file: +- statement: + - keyword: CREATE + - keyword: CATALOG + - object_reference: + - naked_identifier: customer_cat +- statement_terminator: ; +- statement: + - keyword: CREATE + - keyword: CATALOG + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - object_reference: + - naked_identifier: customer_cat +- statement_terminator: ; +- statement: + - keyword: CREATE + - keyword: CATALOG + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - object_reference: + - naked_identifier: customer_cat + - keyword: COMMENT + - quoted_literal: '''This is customer catalog''' +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.sql new file mode 100644 index 000000000..819f8efe9 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.sql @@ -0,0 +1,30 @@ +-- Create database with all optional syntax +CREATE DATABASE IF NOT EXISTS database_name +COMMENT "database_comment" +LOCATION "root/database_directory" +WITH DBPROPERTIES ( "property_name" = "property_value"); + +-- Create schema with all optional syntax +CREATE SCHEMA IF NOT EXISTS database_name +COMMENT "database_comment" +LOCATION "root/database_directory" +WITH DBPROPERTIES ( "property_name" = "property_value" ); + +-- Create database `customer_db`. +CREATE DATABASE customer_db; + +-- Create database `customer_db` only if database with same name doesn't exist. +CREATE DATABASE IF NOT EXISTS customer_db; + +-- `Comments`,`Specific Location` and `Database properties`. +CREATE DATABASE IF NOT EXISTS customer_db +COMMENT 'This is customer database' LOCATION '/user' +WITH DBPROPERTIES ("ID" = "001", "Name" = 'John'); + +-- Create `inventory_db` Database +CREATE DATABASE inventory_db +COMMENT 'This database is used to maintain Inventory'; + +-- Create schema with a managed location +CREATE SCHEMA IF NOT EXISTS database_name +MANAGED LOCATION "s3://root_database_bucket/" diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml new file mode 100644 index 000000000..89458acc9 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml @@ -0,0 +1,118 @@ +file: +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: DATABASE + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - object_reference: + - naked_identifier: database_name + - keyword: COMMENT + - quoted_literal: '"database_comment"' + - keyword: LOCATION + - quoted_literal: '"root/database_directory"' + - keyword: WITH + - keyword: DBPROPERTIES + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '"property_name"' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '"property_value"' + - end_bracket: ) +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: SCHEMA + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - object_reference: + - naked_identifier: database_name + - keyword: COMMENT + - quoted_literal: '"database_comment"' + - keyword: LOCATION + - quoted_literal: '"root/database_directory"' + - keyword: WITH + - keyword: DBPROPERTIES + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '"property_name"' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '"property_value"' + - end_bracket: ) +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: DATABASE + - object_reference: + - naked_identifier: customer_db +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: DATABASE + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - object_reference: + - naked_identifier: customer_db +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: DATABASE + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - object_reference: + - naked_identifier: customer_db + - keyword: COMMENT + - quoted_literal: '''This is customer database''' + - keyword: LOCATION + - quoted_literal: '''/user''' + - keyword: WITH + - keyword: DBPROPERTIES + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '"ID"' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '"001"' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '"Name"' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''John''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: DATABASE + - object_reference: + - naked_identifier: inventory_db + - keyword: COMMENT + - quoted_literal: '''This database is used to maintain Inventory''' +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: SCHEMA + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - object_reference: + - naked_identifier: database_name +- file: + - word: MANAGED + - word: LOCATION + - double_quote: '"s3://root_database_bucket/"' diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.sql new file mode 100644 index 000000000..b84a5e451 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.sql @@ -0,0 +1,106 @@ +-- Create FUNCTION with all optional syntax +CREATE OR REPLACE TEMPORARY FUNCTION IF NOT EXISTS +function_name AS "class_name" USING FILE "resource_locations"; + +-- Create a permanent function called `simple_udf`. +CREATE FUNCTION simple_udf AS 'SimpleUdf' +USING JAR '/tmp/SimpleUdf.jar'; + +-- Created a temporary function. +CREATE TEMPORARY FUNCTION simple_temp_udf AS 'SimpleUdf' +USING JAR '/tmp/SimpleUdf.jar'; + +-- Replace the implementation of `simple_udf` +CREATE OR REPLACE FUNCTION simple_udf AS 'SimpleUdfR' +USING JAR '/tmp/SimpleUdfR.jar'; + +-- Create a permanent function `test_avg` +CREATE FUNCTION test_avg +AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage'; + +---- Create Temporary function `test_avg` +CREATE TEMPORARY FUNCTION test_avg +AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage'; + +-- Create a temporary function with no parameter +CREATE TEMPORARY FUNCTION hello() +RETURNS STRING RETURN 'Hello World!'; + +-- Create a temporary function with no parameter. +CREATE OR REPLACE TEMPORARY FUNCTION function_name() +RETURNS TIMESTAMP LANGUAGE SQL +RETURN SELECT MAX(time) AS time FROM my_table; + +-- Create a permanent function with parameters +CREATE FUNCTION area(x DOUBLE, y DOUBLE) +RETURNS DOUBLE +RETURN x * y; + +-- Compose SQL functions. +CREATE FUNCTION square(x DOUBLE) +RETURNS DOUBLE +RETURN area(x, x); + +-- Create a CTE function +CREATE FUNCTION cte_function(x INT) +RETURNS string +LANGUAGE SQL +RETURN +WITH cte AS (SELECT x AS y) +SELECT * FROM cte; + +-- Create a non-deterministic function +CREATE FUNCTION roll_dice() + RETURNS INT + NOT DETERMINISTIC + CONTAINS SQL + COMMENT 'Roll a single 6 sided die' + RETURN (rand() * 6)::INT + 1; + + +-- Create a non-deterministic function with parameters and defaults +CREATE FUNCTION roll_dice(num_dice INT DEFAULT 1 COMMENT 'number of dice to roll (Default: 1)', + num_sides INT DEFAULT 6 COMMENT 'number of sides per die (Default: 6)') + RETURNS INT + NOT DETERMINISTIC + CONTAINS SQL + COMMENT 'Roll a number of n-sided dice' + RETURN aggregate(sequence(1, roll_dice.num_dice, 1), + 0, + (acc, x) -> (rand() * roll_dice.num_sides)::int, + acc -> acc + roll_dice.num_dice); + +-- Create Python functions +CREATE FUNCTION main.default.greet(s STRING) + RETURNS STRING + LANGUAGE PYTHON + AS $$ + def greet(name): + return "Hello " + name + "!" + + return greet(s) if s else None + $$; + +-- Created Table Valued Function simple +CREATE FUNCTION return_table() +RETURNS TABLE +RETURN +SELECT time FROM my_table +; + +-- Created Table Valued Function with column spec + comment +CREATE FUNCTION return_table() +RETURNS TABLE (col_a string, col_b string comment "asdf") +RETURN +SELECT col_a, col_b FROM my_table +; + + +-- backticked identifier +create or replace function `catalog`.`schema`.`name` ( + param int +) +returns int +return +select param +; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.yml new file mode 100644 index 000000000..c85997cc3 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.yml @@ -0,0 +1,341 @@ +file: +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: OR + - keyword: REPLACE + - keyword: TEMPORARY + - keyword: FUNCTION + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - function_name_identifier: function_name + - keyword: AS + - quoted_literal: '"class_name"' + - keyword: USING + - file_keyword: FILE + - quoted_literal: '"resource_locations"' +- statement_terminator: ; +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: FUNCTION + - function_name_identifier: simple_udf + - keyword: AS + - quoted_literal: '''SimpleUdf''' + - keyword: USING + - file_keyword: JAR + - quoted_literal: '''/tmp/SimpleUdf.jar''' +- statement_terminator: ; +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: TEMPORARY + - keyword: FUNCTION + - function_name_identifier: simple_temp_udf + - keyword: AS + - quoted_literal: '''SimpleUdf''' + - keyword: USING + - file_keyword: JAR + - quoted_literal: '''/tmp/SimpleUdf.jar''' +- statement_terminator: ; +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: OR + - keyword: REPLACE + - keyword: FUNCTION + - function_name_identifier: simple_udf + - keyword: AS + - quoted_literal: '''SimpleUdfR''' + - keyword: USING + - file_keyword: JAR + - quoted_literal: '''/tmp/SimpleUdfR.jar''' +- statement_terminator: ; +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: FUNCTION + - function_name_identifier: test_avg + - keyword: AS + - quoted_literal: '''org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage''' +- statement_terminator: ; +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: TEMPORARY + - keyword: FUNCTION + - function_name_identifier: test_avg + - keyword: AS + - quoted_literal: '''org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage''' +- statement_terminator: ; +- file: + - word: CREATE + - word: TEMPORARY + - word: FUNCTION + - word: hello + - start_bracket: ( + - end_bracket: ) + - word: RETURNS + - word: STRING + - word: RETURN + - single_quote: '''Hello World!''' + - semicolon: ; + - word: CREATE + - word: OR + - word: REPLACE + - word: TEMPORARY + - word: FUNCTION + - word: function_name + - start_bracket: ( + - end_bracket: ) + - word: RETURNS + - word: TIMESTAMP + - word: LANGUAGE + - word: SQL + - word: RETURN + - word: SELECT + - word: MAX + - start_bracket: ( + - word: time + - end_bracket: ) + - word: AS + - word: time + - word: FROM + - word: my_table + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: area + - start_bracket: ( + - word: x + - word: DOUBLE + - comma: ',' + - word: y + - word: DOUBLE + - end_bracket: ) + - word: RETURNS + - word: DOUBLE + - word: RETURN + - word: x + - star: '*' + - word: y + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: square + - start_bracket: ( + - word: x + - word: DOUBLE + - end_bracket: ) + - word: RETURNS + - word: DOUBLE + - word: RETURN + - word: area + - start_bracket: ( + - word: x + - comma: ',' + - word: x + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: cte_function + - start_bracket: ( + - word: x + - word: INT + - end_bracket: ) + - word: RETURNS + - word: string + - word: LANGUAGE + - word: SQL + - word: RETURN + - word: WITH + - word: cte + - word: AS + - start_bracket: ( + - word: SELECT + - word: x + - word: AS + - word: y + - end_bracket: ) + - word: SELECT + - star: '*' + - word: FROM + - word: cte + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: roll_dice + - start_bracket: ( + - end_bracket: ) + - word: RETURNS + - word: INT + - word: NOT + - word: DETERMINISTIC + - word: CONTAINS + - word: SQL + - word: COMMENT + - single_quote: '''Roll a single 6 sided die''' + - word: RETURN + - start_bracket: ( + - word: rand + - start_bracket: ( + - end_bracket: ) + - star: '*' + - numeric_literal: '6' + - end_bracket: ) + - casting_operator: '::' + - word: INT + - plus: + + - numeric_literal: '1' + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: roll_dice + - start_bracket: ( + - word: num_dice + - word: INT + - word: DEFAULT + - numeric_literal: '1' + - word: COMMENT + - single_quote: '''number of dice to roll (Default: 1)''' + - comma: ',' + - word: num_sides + - word: INT + - word: DEFAULT + - numeric_literal: '6' + - word: COMMENT + - single_quote: '''number of sides per die (Default: 6)''' + - end_bracket: ) + - word: RETURNS + - word: INT + - word: NOT + - word: DETERMINISTIC + - word: CONTAINS + - word: SQL + - word: COMMENT + - single_quote: '''Roll a number of n-sided dice''' + - word: RETURN + - word: aggregate + - start_bracket: ( + - word: sequence + - start_bracket: ( + - numeric_literal: '1' + - comma: ',' + - word: roll_dice + - dot: . + - word: num_dice + - comma: ',' + - numeric_literal: '1' + - end_bracket: ) + - comma: ',' + - numeric_literal: '0' + - comma: ',' + - start_bracket: ( + - word: acc + - comma: ',' + - word: x + - end_bracket: ) + - right_arrow: -> + - start_bracket: ( + - word: rand + - start_bracket: ( + - end_bracket: ) + - star: '*' + - word: roll_dice + - dot: . + - word: num_sides + - end_bracket: ) + - casting_operator: '::' + - word: int + - comma: ',' + - word: acc + - right_arrow: -> + - word: acc + - plus: + + - word: roll_dice + - dot: . + - word: num_dice + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: main + - dot: . + - word: default + - dot: . + - word: greet + - start_bracket: ( + - word: s + - word: STRING + - end_bracket: ) + - word: RETURNS + - word: STRING + - word: LANGUAGE + - word: PYTHON + - word: AS + - dollar_quote: |- + $$ + def greet(name): + return "Hello " + name + "!" + + return greet(s) if s else None + $$ + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: return_table + - start_bracket: ( + - end_bracket: ) + - word: RETURNS + - word: TABLE + - word: RETURN + - word: SELECT + - word: time + - word: FROM + - word: my_table + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: return_table + - start_bracket: ( + - end_bracket: ) + - word: RETURNS + - word: TABLE + - start_bracket: ( + - word: col_a + - word: string + - comma: ',' + - word: col_b + - word: string + - word: comment + - double_quote: '"asdf"' + - end_bracket: ) + - word: RETURN + - word: SELECT + - word: col_a + - comma: ',' + - word: col_b + - word: FROM + - word: my_table + - semicolon: ; + - word: create + - word: or + - word: replace + - word: function + - back_quote: '`catalog`' + - dot: . + - back_quote: '`schema`' + - dot: . + - back_quote: '`name`' + - start_bracket: ( + - word: param + - word: int + - end_bracket: ) + - word: returns + - word: int + - word: return + - word: select + - word: param + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.sql new file mode 100644 index 000000000..342959dbf --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.sql @@ -0,0 +1,80 @@ +CREATE TABLE tablename +( + id_column INT, + othercolumn STRING, + generated_always_as_expression DATE GENERATED ALWAYS AS (CAST(birth_date AS DATE)), + generated_by_default BIGINT GENERATED BY DEFAULT AS IDENTITY, + generated_always BIGINT GENERATED ALWAYS AS IDENTITY, + generated_column_start_with BIGINT GENERATED ALWAYS AS IDENTITY (START WITH 10), + generated_column_increment_by BIGINT GENERATED ALWAYS AS IDENTITY (INCREMENT BY 5), + generated_column_start_with_increment_by BIGINT GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 5) +) +USING DELTA +LOCATION "s3://someplace" +CLUSTER BY (id_column); +OPTIMIZE tablename; + + +OPTIMIZE tablename +WHERE date >= current_timestamp() - INTERVAL 1 day +ZORDER BY (eventType, eventTime); + + +-- Creates a Delta table +CREATE TABLE student (id INT, name STRING, age INT); + +-- Use data from another table +CREATE TABLE student_copy AS SELECT * FROM student; + +-- Creates a CSV table from an external directory +CREATE TABLE student USING CSV LOCATION '/path/to/csv_files'; + +-- Specify table comment and properties +CREATE TABLE student (id INT, name STRING, age INT) + COMMENT 'this is a comment' + TBLPROPERTIES ('foo'='bar'); + +-- Specify table comment and properties with different clauses order +CREATE TABLE student (id INT, name STRING, age INT) + TBLPROPERTIES ('foo'='bar') + COMMENT 'this is a comment'; + +-- Create partitioned table +CREATE TABLE student (id INT, name STRING, age INT) + PARTITIONED BY (age); + +-- Create a table with a generated column +CREATE TABLE rectangles(a INT, b INT, + area INT GENERATED ALWAYS AS (a * b)); + +-- Create a table with a primary key +CREATE TABLE rectangles(a INT, b INT PRIMARY KEY); + +-- Create a table with a not null primary key +CREATE TABLE rectangles(a INT NOT NULL, b INT NOT NULL PRIMARY KEY); + +-- Create a table with a foreign key relation +CREATE OR REPLACE TABLE TABLE1 ( + DATE_VALUE DATE NOT NULL + CONSTRAINT DATE_CONSTRAINT + FOREIGN KEY REFERENCES TABLE2 +); + +-- Create a table with a column with default value +CREATE TABLE student (id INT, name STRING DEFAULT 'bobby tables', age INT); + +-- Create a table with non nullable column with default value +CREATE TABLE student (id INT, name STRING NOT NULL DEFAULT 'bobby tables', age INT); + +-- Create a table with a default timestamp +CREATE TABLE clock ( + which_time TIMESTAMP DEFAULT current_timestamp() +); + +-- Create a table with mixing default value and constraints +CREATE TABLE clock ( + which_time TIMESTAMP CONSTRAINT clock_pk PRIMARY KEY DEFAULT current_timestamp() NOT NULL +); + +-- Creates a table using identifier +CREATE TABLE IDENTIFIER('student') (id INT, name STRING, age INT); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.yml new file mode 100644 index 000000000..6f937ada9 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.yml @@ -0,0 +1,362 @@ +file: +- statement: + - create_table_statement: + - keyword: CREATE + - keyword: TABLE + - table_reference: + - naked_identifier: tablename +- file: + - start_bracket: ( + - word: id_column + - word: INT + - comma: ',' + - word: othercolumn + - word: STRING + - comma: ',' + - word: generated_always_as_expression + - word: DATE + - word: GENERATED + - word: ALWAYS + - word: AS + - start_bracket: ( + - word: CAST + - start_bracket: ( + - word: birth_date + - word: AS + - word: DATE + - end_bracket: ) + - end_bracket: ) + - comma: ',' + - word: generated_by_default + - word: BIGINT + - word: GENERATED + - word: BY + - word: DEFAULT + - word: AS + - word: IDENTITY + - comma: ',' + - word: generated_always + - word: BIGINT + - word: GENERATED + - word: ALWAYS + - word: AS + - word: IDENTITY + - comma: ',' + - word: generated_column_start_with + - word: BIGINT + - word: GENERATED + - word: ALWAYS + - word: AS + - word: IDENTITY + - start_bracket: ( + - word: START + - word: WITH + - numeric_literal: '10' + - end_bracket: ) + - comma: ',' + - word: generated_column_increment_by + - word: BIGINT + - word: GENERATED + - word: ALWAYS + - word: AS + - word: IDENTITY + - start_bracket: ( + - word: INCREMENT + - word: BY + - numeric_literal: '5' + - end_bracket: ) + - comma: ',' + - word: generated_column_start_with_increment_by + - word: BIGINT + - word: GENERATED + - word: ALWAYS + - word: AS + - word: IDENTITY + - start_bracket: ( + - word: START + - word: WITH + - numeric_literal: '10' + - word: INCREMENT + - word: BY + - numeric_literal: '5' + - end_bracket: ) + - end_bracket: ) + - word: USING + - word: DELTA + - word: LOCATION + - double_quote: '"s3://someplace"' + - word: CLUSTER + - word: BY + - start_bracket: ( + - word: id_column + - end_bracket: ) + - semicolon: ; + - word: OPTIMIZE + - word: tablename + - semicolon: ; + - word: OPTIMIZE + - word: tablename + - word: WHERE + - word: date + - raw_comparison_operator: '>' + - raw_comparison_operator: = + - word: current_timestamp + - start_bracket: ( + - end_bracket: ) + - minus: '-' + - word: INTERVAL + - numeric_literal: '1' + - word: day + - word: ZORDER + - word: BY + - start_bracket: ( + - word: eventType + - comma: ',' + - word: eventTime + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student_copy + - word: AS + - word: SELECT + - star: '*' + - word: FROM + - word: student + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - word: USING + - word: CSV + - word: LOCATION + - single_quote: '''/path/to/csv_files''' + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - word: COMMENT + - single_quote: '''this is a comment''' + - word: TBLPROPERTIES + - start_bracket: ( + - single_quote: '''foo''' + - raw_comparison_operator: = + - single_quote: '''bar''' + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - word: TBLPROPERTIES + - start_bracket: ( + - single_quote: '''foo''' + - raw_comparison_operator: = + - single_quote: '''bar''' + - end_bracket: ) + - word: COMMENT + - single_quote: '''this is a comment''' + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - word: PARTITIONED + - word: BY + - start_bracket: ( + - word: age + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: rectangles + - start_bracket: ( + - word: a + - word: INT + - comma: ',' + - word: b + - word: INT + - comma: ',' + - word: area + - word: INT + - word: GENERATED + - word: ALWAYS + - word: AS + - start_bracket: ( + - word: a + - star: '*' + - word: b + - end_bracket: ) + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: rectangles + - start_bracket: ( + - word: a + - word: INT + - comma: ',' + - word: b + - word: INT + - word: PRIMARY + - word: KEY + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: rectangles + - start_bracket: ( + - word: a + - word: INT + - word: NOT + - word: 'NULL' + - comma: ',' + - word: b + - word: INT + - word: NOT + - word: 'NULL' + - word: PRIMARY + - word: KEY + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: OR + - word: REPLACE + - word: TABLE + - word: TABLE1 + - start_bracket: ( + - word: DATE_VALUE + - word: DATE + - word: NOT + - word: 'NULL' + - word: CONSTRAINT + - word: DATE_CONSTRAINT + - word: FOREIGN + - word: KEY + - word: REFERENCES + - word: TABLE2 + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - word: DEFAULT + - single_quote: '''bobby tables''' + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - word: NOT + - word: 'NULL' + - word: DEFAULT + - single_quote: '''bobby tables''' + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: clock + - start_bracket: ( + - word: which_time + - word: TIMESTAMP + - word: DEFAULT + - word: current_timestamp + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: clock + - start_bracket: ( + - word: which_time + - word: TIMESTAMP + - word: CONSTRAINT + - word: clock_pk + - word: PRIMARY + - word: KEY + - word: DEFAULT + - word: current_timestamp + - start_bracket: ( + - end_bracket: ) + - word: NOT + - word: 'NULL' + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''student''' + - end_bracket: ) + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.sql new file mode 100644 index 000000000..99f9939a9 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.sql @@ -0,0 +1,27 @@ +-- Create volume `customer_vol`. +-- This throws exception if volume with name customer_vol already exists. +CREATE VOLUME customer_vol; + +-- Create volume `customer_vol` only if volume with same name doesn't exist. +CREATE VOLUME IF NOT EXISTS customer_vol; + +-- Create volume `customer_vol` only if volume with same name doesn't exist, +-- with a comment. +CREATE VOLUME IF NOT EXISTS customer_vol COMMENT 'This is customer volume'; + +-- Create external volume `customer_vol_external` +-- This throws exception if volume with name customer_vol_external +-- already exists. +CREATE EXTERNAL VOLUME customer_vol_external +LOCATION 's3://s3-path/'; + +-- Create external volume `customer_vol_external` +-- only if volume with same name doesn't exist, with a location. +CREATE EXTERNAL VOLUME IF NOT EXISTS customer_vol_external +LOCATION 's3://s3-path/'; + +-- Create external volume `customer_vol_external` +-- only if volume with same name doesn't exist, with a location and a comment. +CREATE EXTERNAL VOLUME IF NOT EXISTS customer_vol_external +LOCATION 's3://s3-path/' +COMMENT 'This is customer volume'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.yml new file mode 100644 index 000000000..4c6328f14 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.yml @@ -0,0 +1,51 @@ +file: +- unparsable: + - word: CREATE + - word: VOLUME + - word: customer_vol + - semicolon: ; + - word: CREATE + - word: VOLUME + - word: IF + - word: NOT + - word: EXISTS + - word: customer_vol + - semicolon: ; + - word: CREATE + - word: VOLUME + - word: IF + - word: NOT + - word: EXISTS + - word: customer_vol + - word: COMMENT + - single_quote: '''This is customer volume''' + - semicolon: ; + - word: CREATE + - word: EXTERNAL + - word: VOLUME + - word: customer_vol_external + - word: LOCATION + - single_quote: '''s3://s3-path/''' + - semicolon: ; + - word: CREATE + - word: EXTERNAL + - word: VOLUME + - word: IF + - word: NOT + - word: EXISTS + - word: customer_vol_external + - word: LOCATION + - single_quote: '''s3://s3-path/''' + - semicolon: ; + - word: CREATE + - word: EXTERNAL + - word: VOLUME + - word: IF + - word: NOT + - word: EXISTS + - word: customer_vol_external + - word: LOCATION + - single_quote: '''s3://s3-path/''' + - word: COMMENT + - single_quote: '''This is customer volume''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.sql new file mode 100644 index 000000000..f74d12812 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.sql @@ -0,0 +1,19 @@ +SELECT + my_table.a, + other_table.b +FROM my_table +LEFT JOIN other_table + ON DATEDIFF(SECOND, my_table.timestamp_a, other_table.timestamp_b) > 1; + +SELECT + DATE_ADD(MICROSECOND, 5, start_dt) AS date_add_micro, + DATE_DIFF(MILLISECOND, start_dt, end_dt) AS datediff_milli, + DATEADD(MINUTE, 5, start_dt) AS dateadd_min, + DATEDIFF(HOUR, start_dt, end_dt) AS datediff_hr, + TIMEDIFF(DAY, start_dt, end_dt) AS timediff_day, + TIMESTAMPADD(DAYOFYEAR, 5, start_dt) AS ts_add_day_of_yr, + TIMESTAMPDIFF(WEEK, start_dt, end_dt) AS ts_diff_week, + DATE_ADD(MONTH, 5, start_dt) AS date_add_month, + DATE_ADD(QUARTER, 5, start_dt) AS date_add_quarter, + DATE_ADD(YEAR, 5, start_dt) AS date_add_year +FROM my_table; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.yml new file mode 100644 index 000000000..ee04fd25e --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.yml @@ -0,0 +1,281 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: my_table + - dot: . + - naked_identifier: a + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: other_table + - dot: . + - naked_identifier: b + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: my_table + - join_clause: + - keyword: LEFT + - keyword: JOIN + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: other_table + - join_on_condition: + - keyword: ON + - expression: + - function: + - function_name: + - function_name_identifier: DATEDIFF + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: SECOND + - comma: ',' + - expression: + - column_reference: + - naked_identifier: my_table + - dot: . + - naked_identifier: timestamp_a + - comma: ',' + - expression: + - column_reference: + - naked_identifier: other_table + - dot: . + - naked_identifier: timestamp_b + - end_bracket: ) + - comparison_operator: + - raw_comparison_operator: '>' + - numeric_literal: '1' +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATE_ADD + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: MICROSECOND + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: date_add_micro + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATE_DIFF + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: MILLISECOND + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - comma: ',' + - expression: + - column_reference: + - naked_identifier: end_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: datediff_milli + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATEADD + - bracketed: + - start_bracket: ( + - date_part: MINUTE + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: dateadd_min + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATEDIFF + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: HOUR + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - comma: ',' + - expression: + - column_reference: + - naked_identifier: end_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: datediff_hr + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: TIMEDIFF + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: DAY + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - comma: ',' + - expression: + - column_reference: + - naked_identifier: end_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: timediff_day + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: TIMESTAMPADD + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: DAYOFYEAR + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: ts_add_day_of_yr + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: TIMESTAMPDIFF + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: WEEK + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - comma: ',' + - expression: + - column_reference: + - naked_identifier: end_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: ts_diff_week + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATE_ADD + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: MONTH + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: date_add_month + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATE_ADD + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: QUARTER + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: date_add_quarter + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATE_ADD + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: YEAR + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: date_add_year + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: my_table +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.sql new file mode 100644 index 000000000..dc45ce325 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.sql @@ -0,0 +1,7 @@ +DECLARE var; +DECLARE OR REPLACE var; +DECLARE OR REPLACE VARIABLE var; +DECLARE var INT DEFAULT 5; +DECLARE var INT = 5; +DECLARE var = 5; +DECLARE var DEFAULT 5; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.yml new file mode 100644 index 000000000..88adf1290 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.yml @@ -0,0 +1,38 @@ +file: +- unparsable: + - word: DECLARE + - word: var + - semicolon: ; + - word: DECLARE + - word: OR + - word: REPLACE + - word: var + - semicolon: ; + - word: DECLARE + - word: OR + - word: REPLACE + - word: VARIABLE + - word: var + - semicolon: ; + - word: DECLARE + - word: var + - word: INT + - word: DEFAULT + - numeric_literal: '5' + - semicolon: ; + - word: DECLARE + - word: var + - word: INT + - raw_comparison_operator: = + - numeric_literal: '5' + - semicolon: ; + - word: DECLARE + - word: var + - raw_comparison_operator: = + - numeric_literal: '5' + - semicolon: ; + - word: DECLARE + - word: var + - word: DEFAULT + - numeric_literal: '5' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.sql new file mode 100644 index 000000000..4fcd7d849 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.sql @@ -0,0 +1,2 @@ +-- Desribe the volume +DESCRIBE VOLUME VACCINE_VOLUME; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.yml new file mode 100644 index 000000000..73a07faa2 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.yml @@ -0,0 +1,8 @@ +file: +- statement: + - describe_statement: + - keyword: DESCRIBE + - table_reference: + - naked_identifier: VOLUME + - naked_identifier: VACCINE_VOLUME +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.sql new file mode 100644 index 000000000..a93a3fe0b --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.sql @@ -0,0 +1,5 @@ +-- Drop the catalog and its schemas +DROP CATALOG vaccine CASCADE; + +-- Drop the catalog using IF EXISTS and only if it is empty. +DROP CATALOG IF EXISTS vaccine RESTRICT; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml new file mode 100644 index 000000000..ee3e98eb2 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml @@ -0,0 +1,17 @@ +file: +- statement: + - keyword: DROP + - keyword: CATALOG + - object_reference: + - naked_identifier: vaccine + - keyword: CASCADE +- statement_terminator: ; +- statement: + - keyword: DROP + - keyword: CATALOG + - keyword: IF + - keyword: EXISTS + - object_reference: + - naked_identifier: vaccine + - keyword: RESTRICT +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.sql new file mode 100644 index 000000000..cd3ebede1 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.sql @@ -0,0 +1,5 @@ +-- Drop the volume +DROP VOLUME vaccine_volume; + +-- Drop the volume using IF EXISTS. +DROP VOLUME IF EXISTS vaccine_volume; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml new file mode 100644 index 000000000..b21d0c759 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml @@ -0,0 +1,15 @@ +file: +- statement: + - keyword: DROP + - keyword: VOLUME + - object_reference: + - naked_identifier: vaccine_volume +- statement_terminator: ; +- statement: + - keyword: DROP + - keyword: VOLUME + - keyword: IF + - keyword: EXISTS + - object_reference: + - naked_identifier: vaccine_volume +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.sql new file mode 100644 index 000000000..9fafcadb0 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.sql @@ -0,0 +1,24 @@ +-- Databricks notebook source +-- MAGIC %md +-- MAGIC # Dummy Notebook + +-- COMMAND ---------- + +-- DBTITLE 1,Select Data + +SELECT x FROM y + +-- COMMAND ---------- + +-- MAGIC %python +-- MAGIC foo = 'bar' +-- MAGIC print(foo) + +-- COMMAND ---------- + +SELECT a FROM b; + +-- COMMAND ---------- + +-- MAGIC %sh +-- MAGIC echo heloworld diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.yml new file mode 100644 index 000000000..5b1e62bc2 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.yml @@ -0,0 +1,21 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: x + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: y + - unparsable: + - word: SELECT + - word: a + - word: FROM + - word: b +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.sql new file mode 100644 index 000000000..d59646f11 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.sql @@ -0,0 +1,7 @@ +--https://docs.databricks.com/en/sql/language-manual/sql-ref-function-invocation.html#named-parameter-invocation + +select my_function(arg1 => 3, arg2 => 4) from dual; + +select my_function(3, arg2 => 4) from dual; + +select my_function(arg1 => 3, 4) from dual; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml new file mode 100644 index 000000000..f3ce07dbc --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml @@ -0,0 +1,85 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - function: + - function_name: + - function_name_identifier: my_function + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: arg1 + - unparsable: + - right_arrow: => + - numeric_literal: '3' + - comma: ',' + - word: arg2 + - right_arrow: => + - numeric_literal: '4' + - end_bracket: ) + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dual +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - function: + - function_name: + - function_name_identifier: my_function + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '3' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: arg2 + - unparsable: + - right_arrow: => + - numeric_literal: '4' + - end_bracket: ) + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dual +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - function: + - function_name: + - function_name_identifier: my_function + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: arg1 + - unparsable: + - right_arrow: => + - numeric_literal: '3' + - comma: ',' + - numeric_literal: '4' + - end_bracket: ) + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dual +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.sql new file mode 100644 index 000000000..6d4df77d7 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.sql @@ -0,0 +1,38 @@ +-- Examples from https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-pivot.html + +-- A very basic PIVOT +-- Given a table with sales by quarter, return a table that returns sales across quarters per year. +SELECT year, region, q1, q2, q3, q4 +FROM sales +PIVOT (sum(sales) AS sales + FOR quarter + IN (1 AS q1, 2 AS q2, 3 AS q3, 4 AS q4)); + +-- Also PIVOT on region +SELECT year, q1_east, q1_west, q2_east, q2_west, q3_east, q3_west, q4_east, q4_west +FROM sales +PIVOT (sum(sales) AS sales + FOR (quarter, region) + IN ((1, 'east') AS q1_east, (1, 'west') AS q1_west, (2, 'east') AS q2_east, (2, 'west') AS q2_west, + (3, 'east') AS q3_east, (3, 'west') AS q3_west, (4, 'east') AS q4_east, (4, 'west') AS q4_west)); + +-- To aggregate across regions the column must be removed from the input. +SELECT year, q1, q2, q3, q4 +FROM (SELECT year, quarter, sales FROM sales) AS s +PIVOT (sum(sales) AS sales + FOR quarter + IN (1 AS q1, 2 AS q2, 3 AS q3, 4 AS q4)); + +-- A PIVOT with multiple aggregations +SELECT year, q1_total, q1_avg, q2_total, q2_avg, q3_total, q3_avg, q4_total, q4_avg + FROM (SELECT year, quarter, sales FROM sales) AS s + PIVOT (sum(sales) AS total, avg(sales) AS avg + FOR quarter + IN (1 AS q1, 2 AS q2, 3 AS q3, 4 AS q4)); + +-- A PIVOT with anonymous columns +SELECT year, region, q1, q2, q3, q4 +FROM sales +PIVOT (sum(sales) + FOR quarter + IN (1 AS q1, 2 AS q2, 3 AS q3, 4 AS q4)); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.yml new file mode 100644 index 000000000..a6b1fcdd3 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.yml @@ -0,0 +1,563 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: region + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4 + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - pivot_clause: + - keyword: PIVOT + - bracketed: + - start_bracket: ( + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sales + - keyword: FOR + - naked_identifier: quarter + - keyword: IN + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - alias_expression: + - keyword: AS + - naked_identifier: q1 + - comma: ',' + - expression: + - numeric_literal: '2' + - alias_expression: + - keyword: AS + - naked_identifier: q2 + - comma: ',' + - expression: + - numeric_literal: '3' + - alias_expression: + - keyword: AS + - naked_identifier: q3 + - comma: ',' + - expression: + - numeric_literal: '4' + - alias_expression: + - keyword: AS + - naked_identifier: q4 + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1_east + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1_west + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2_east + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2_west + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3_east + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3_west + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4_east + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4_west + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - pivot_clause: + - keyword: PIVOT + - bracketed: + - start_bracket: ( + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sales + - keyword: FOR + - bracketed: + - start_bracket: ( + - naked_identifier: quarter + - comma: ',' + - naked_identifier: region + - end_bracket: ) + - keyword: IN + - bracketed: + - start_bracket: ( + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - comma: ',' + - expression: + - quoted_literal: '''east''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q1_east + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - comma: ',' + - expression: + - quoted_literal: '''west''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q1_west + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '2' + - comma: ',' + - expression: + - quoted_literal: '''east''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q2_east + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '2' + - comma: ',' + - expression: + - quoted_literal: '''west''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q2_west + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '3' + - comma: ',' + - expression: + - quoted_literal: '''east''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q3_east + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '3' + - comma: ',' + - expression: + - quoted_literal: '''west''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q3_west + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '4' + - comma: ',' + - expression: + - quoted_literal: '''east''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q4_east + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '4' + - comma: ',' + - expression: + - quoted_literal: '''west''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q4_west + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4 + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - bracketed: + - start_bracket: ( + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: quarter + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: sales + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: s + - pivot_clause: + - keyword: PIVOT + - bracketed: + - start_bracket: ( + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sales + - keyword: FOR + - naked_identifier: quarter + - keyword: IN + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - alias_expression: + - keyword: AS + - naked_identifier: q1 + - comma: ',' + - expression: + - numeric_literal: '2' + - alias_expression: + - keyword: AS + - naked_identifier: q2 + - comma: ',' + - expression: + - numeric_literal: '3' + - alias_expression: + - keyword: AS + - naked_identifier: q3 + - comma: ',' + - expression: + - numeric_literal: '4' + - alias_expression: + - keyword: AS + - naked_identifier: q4 + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1_total + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1_avg + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2_total + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2_avg + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3_total + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3_avg + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4_total + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4_avg + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - bracketed: + - start_bracket: ( + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: quarter + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: sales + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: s + - pivot_clause: + - keyword: PIVOT + - bracketed: + - start_bracket: ( + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: total + - comma: ',' + - function: + - function_name: + - function_name_identifier: avg + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: avg + - keyword: FOR + - naked_identifier: quarter + - keyword: IN + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - alias_expression: + - keyword: AS + - naked_identifier: q1 + - comma: ',' + - expression: + - numeric_literal: '2' + - alias_expression: + - keyword: AS + - naked_identifier: q2 + - comma: ',' + - expression: + - numeric_literal: '3' + - alias_expression: + - keyword: AS + - naked_identifier: q3 + - comma: ',' + - expression: + - numeric_literal: '4' + - alias_expression: + - keyword: AS + - naked_identifier: q4 + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: region + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4 + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - pivot_clause: + - keyword: PIVOT + - bracketed: + - start_bracket: ( + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - keyword: FOR + - naked_identifier: quarter + - keyword: IN + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - alias_expression: + - keyword: AS + - naked_identifier: q1 + - comma: ',' + - expression: + - numeric_literal: '2' + - alias_expression: + - keyword: AS + - naked_identifier: q2 + - comma: ',' + - expression: + - numeric_literal: '3' + - alias_expression: + - keyword: AS + - naked_identifier: q3 + - comma: ',' + - expression: + - numeric_literal: '4' + - alias_expression: + - keyword: AS + - naked_identifier: q4 + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/select.sql new file mode 100644 index 000000000..8d8cb0ef5 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select.sql @@ -0,0 +1,11 @@ +select * +from shopify_cz.order +; + +SELECT * +FROM IDENTIFIER('table_name') +; + +SELECT * +FROM IDENTIFIER('schema_name' || '.table_name') +; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/select.yml new file mode 100644 index 000000000..6274f7491 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select.yml @@ -0,0 +1,68 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - wildcard_expression: + - wildcard_identifier: + - star: '*' + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: shopify_cz + - unparsable: + - dot: . + - word: order +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - wildcard_expression: + - wildcard_identifier: + - star: '*' + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - function: + - function_name: + - function_name_identifier: IDENTIFIER + - bracketed: + - start_bracket: ( + - expression: + - quoted_literal: '''table_name''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - wildcard_expression: + - wildcard_identifier: + - star: '*' + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - function: + - function_name: + - function_name_identifier: IDENTIFIER + - bracketed: + - start_bracket: ( + - expression: + - quoted_literal: '''schema_name''' + - binary_operator: + - pipe: '|' + - pipe: '|' + - quoted_literal: '''.table_name''' + - end_bracket: ) +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.sql new file mode 100644 index 000000000..ab7643cb4 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.sql @@ -0,0 +1,114 @@ +SELECT + id, + name, + age, + class, + address, + c_age, + d_age +FROM person + LATERAL VIEW EXPLODE(ARRAY(30, 60)) tbl_name AS c_age + LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age; + +SELECT + c_age, + COUNT(*) AS record_count +FROM person + LATERAL VIEW EXPLODE(ARRAY(30, 60)) AS c_age + LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age +GROUP BY c_age; + +SELECT + id, + name, + age, + class, + address, + c_age, + d_age +FROM person + LATERAL VIEW EXPLODE(ARRAY()) tbl_name AS c_age; + +SELECT + id, + name, + age, + class, + address, + time, + c_age +FROM person + LATERAL VIEW OUTER EXPLODE(ARRAY()) tbl_name AS c_age; + +SELECT + id, + name, + age, + class, + address, + time, + c_age +FROM person + LATERAL VIEW OUTER EXPLODE(ARRAY()) tbl_name c_age; + +SELECT + id, + name, + age, + class, + address, + time, + c_age +FROM person + LATERAL VIEW OUTER EXPLODE(ARRAY()) c_age; + +SELECT + person.id, + exploded_people.name, + exploded_people.age, + exploded_people.state +FROM person + LATERAL VIEW INLINE(array_of_structs) exploded_people AS name, age, state; + +SELECT + p.id, + exploded_people.name, + exploded_people.age, + exploded_people.state +FROM person AS p + LATERAL VIEW INLINE(array_of_structs) exploded_people AS name, age, state; + +SELECT + p.id, + exploded_people.name, + exploded_people.age, + exploded_people.state +FROM person AS p + LATERAL VIEW INLINE(array_of_structs) exploded_people; + +SELECT + p.id, + exploded_people.name, + exploded_people.age, + exploded_people.state +FROM person AS p + LATERAL VIEW INLINE(array_of_structs) exploded_people name, age, state; + +SELECT + p.id, + exploded_people.name, + exploded_people.age, + exploded_people.state +FROM person AS p + LATERAL VIEW INLINE(array_of_structs) AS name, age, state; + +SELECT + t1.column1, + CAST(GET_JSON_OBJECT(things, '$.percentage') AS DECIMAL(16, 8) + ) AS ptc +FROM table1 AS t1 +LEFT JOIN table2 AS t2 + ON + c.column1 = p.column1 + AND t2.type = 'SOMETHING' + LATERAL VIEW OUTER EXPLODE(t2.column2) AS things; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.yml new file mode 100644 index 000000000..13f693550 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.yml @@ -0,0 +1,769 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: class + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: address + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: d_age + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '30' + - comma: ',' + - expression: + - numeric_literal: '60' + - end_bracket: ) + - end_bracket: ) + - naked_identifier: tbl_name + - keyword: AS + - naked_identifier: c_age + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '40' + - comma: ',' + - expression: + - numeric_literal: '80' + - end_bracket: ) + - end_bracket: ) + - keyword: AS + - naked_identifier: d_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: COUNT + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '30' + - comma: ',' + - expression: + - numeric_literal: '60' + - end_bracket: ) + - end_bracket: ) + - keyword: AS + - naked_identifier: c_age + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '40' + - comma: ',' + - expression: + - numeric_literal: '80' + - end_bracket: ) + - end_bracket: ) + - keyword: AS + - naked_identifier: d_age + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: c_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: class + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: address + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: d_age + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - naked_identifier: tbl_name + - keyword: AS + - naked_identifier: c_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: class + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: address + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: time + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - keyword: OUTER + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - naked_identifier: tbl_name + - keyword: AS + - naked_identifier: c_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: class + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: address + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: time + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - keyword: OUTER + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - naked_identifier: tbl_name + - naked_identifier: c_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: class + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: address + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: time + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - keyword: OUTER + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - naked_identifier: c_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: person + - dot: . + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: state + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: INLINE + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: array_of_structs + - end_bracket: ) + - naked_identifier: exploded_people + - keyword: AS + - naked_identifier: name + - comma: ',' + - naked_identifier: age + - comma: ',' + - naked_identifier: state +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: p + - dot: . + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: state + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - alias_expression: + - keyword: AS + - naked_identifier: p + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: INLINE + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: array_of_structs + - end_bracket: ) + - naked_identifier: exploded_people + - keyword: AS + - naked_identifier: name + - comma: ',' + - naked_identifier: age + - comma: ',' + - naked_identifier: state +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: p + - dot: . + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: state + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - alias_expression: + - keyword: AS + - naked_identifier: p + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: INLINE + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: array_of_structs + - end_bracket: ) + - naked_identifier: exploded_people +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: p + - dot: . + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: state + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - alias_expression: + - keyword: AS + - naked_identifier: p + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: INLINE + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: array_of_structs + - end_bracket: ) + - naked_identifier: exploded_people + - naked_identifier: name + - comma: ',' + - naked_identifier: age + - comma: ',' + - naked_identifier: state +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: p + - dot: . + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: state + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - alias_expression: + - keyword: AS + - naked_identifier: p + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: INLINE + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: array_of_structs + - end_bracket: ) + - keyword: AS + - naked_identifier: name + - comma: ',' + - naked_identifier: age + - comma: ',' + - naked_identifier: state +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: t1 + - dot: . + - naked_identifier: column1 + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: CAST + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: GET_JSON_OBJECT + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: things + - comma: ',' + - expression: + - quoted_literal: '''$.percentage''' + - end_bracket: ) + - keyword: AS + - data_type: + - primitive_type: + - keyword: DECIMAL + - bracketed_arguments: + - bracketed: + - start_bracket: ( + - numeric_literal: '16' + - comma: ',' + - numeric_literal: '8' + - end_bracket: ) + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: ptc + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: table1 + - alias_expression: + - keyword: AS + - naked_identifier: t1 + - join_clause: + - keyword: LEFT + - keyword: JOIN + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: table2 + - alias_expression: + - keyword: AS + - naked_identifier: t2 + - join_on_condition: + - keyword: ON + - expression: + - column_reference: + - naked_identifier: c + - dot: . + - naked_identifier: column1 + - comparison_operator: + - raw_comparison_operator: = + - column_reference: + - naked_identifier: p + - dot: . + - naked_identifier: column1 + - binary_operator: AND + - column_reference: + - naked_identifier: t2 + - dot: . + - naked_identifier: type + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''SOMETHING''' + - unparsable: + - word: LATERAL + - word: VIEW + - word: OUTER + - word: EXPLODE + - start_bracket: ( + - word: t2 + - dot: . + - word: column2 + - end_bracket: ) + - word: AS + - word: things +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.sql new file mode 100644 index 000000000..70c8590b5 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.sql @@ -0,0 +1,140 @@ +-- Sum of quantity per dealership. Group by `id`. +SELECT + id, + sum(quantity) AS sum_quantity +FROM dealer GROUP BY id ORDER BY id; + +-- Use column position in GROUP by clause. +SELECT + id, + sum(quantity) AS sum_quantity +FROM dealer GROUP BY 1 ORDER BY 1; + +-- Multiple aggregations. +-- 1. Sum of quantity per dealership. +-- 2. Max quantity per dealership. +SELECT + id, + sum(quantity) AS sum_quantity, + max(quantity) AS max_quantity +FROM dealer GROUP BY id ORDER BY id; + +-- Count the number of distinct dealer cities per car_model. +SELECT + car_model, + count(DISTINCT city) AS count_distinct_city +FROM dealer GROUP BY car_model; + +-- Sum of only 'Honda Civic' and 'Honda CRV' quantities per dealership. +SELECT + id, + sum(quantity) FILTER ( + WHERE car_model IN ('Honda Civic', 'Honda CRV') + ) AS `sum(quantity)` FROM dealer +GROUP BY id ORDER BY id; + +-- Aggregations using multiple sets of grouping columns in a single statement. +-- Following performs aggregations based on four sets of grouping columns. +-- 1. city, car_model +-- 2. city +-- 3. car_model +-- 4. Empty grouping set. Returns quantities for all city and car models. +SELECT + city, + car_model, + sum(quantity) AS sum_quantity +FROM dealer +GROUP BY GROUPING SETS ((city, car_model), (city), (car_model), ()) +ORDER BY city; + +SELECT + city, + car_model, + sum(quantity) AS sum_quantity +FROM dealer +GROUP BY city, car_model GROUPING SETS ((city, car_model), (city), (car_model), ()) +ORDER BY city; + +SELECT + city, + car_model, + sum(quantity) AS sum_quantity +FROM dealer +GROUP BY city, car_model, GROUPING SETS ((city, car_model), (city), (car_model), ()) +ORDER BY city; + +-- Group by processing with `ROLLUP` clause. +-- Equivalent GROUP BY GROUPING SETS ((city, car_model), (city), ()) +SELECT + city, + car_model, + sum(quantity) AS sum_quantity +FROM dealer +GROUP BY city, car_model WITH ROLLUP +ORDER BY city, car_model; + +-- Group by processing with `CUBE` clause. +-- Equivalent GROUP BY: +-- GROUPING SETS ((city, car_model), (city), (car_model), ()) +SELECT + city, + car_model, + sum(quantity) AS sum_quantity +FROM dealer +GROUP BY city, car_model WITH CUBE +ORDER BY city, car_model; + +-- Select the first row in column age +-- Implicit GROUP BY +SELECT first(age) FROM person; + +-- Implicit GROUP BY +SELECT + first(age IGNORE NULLS) AS first_age, + last(id) AS last_id, + sum(id) AS sum_id +FROM person; + +-- CUBE within GROUP BY clause +SELECT + name, + age, + count(*) AS record_count +FROM people +GROUP BY cube(name, age); + +-- CUBE within GROUP BY clause with single clause on newline +SELECT + name, + count(*) AS record_count +FROM people +GROUP BY cube( + name +); + +-- CUBE within GROUP BY clause with multiple clauses on newline +SELECT + name, + age, + count(*) AS record_count +FROM people +GROUP BY cube( + name, + age +); + +-- ROLLUP within GROUP BY clause +SELECT + name, + age, + count(*) AS record_count +FROM people +GROUP BY rollup(name, age); + +-- GROUP BY ALL +SELECT + name, + age, + count(*) AS record_count +FROM people +GROUP BY ALL; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.yml new file mode 100644 index 000000000..83688b339 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.yml @@ -0,0 +1,857 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: id + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: id +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - numeric_literal: '1' + - orderby_clause: + - keyword: ORDER + - keyword: BY + - numeric_literal: '1' +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: max + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: max_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: id + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: id +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - keyword: DISTINCT + - expression: + - column_reference: + - naked_identifier: city + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: count_distinct_city + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: car_model +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - keyword: FILTER + - bracketed: + - start_bracket: ( + - keyword: WHERE + - expression: + - column_reference: + - naked_identifier: car_model + - keyword: IN + - bracketed: + - start_bracket: ( + - quoted_literal: '''Honda Civic''' + - comma: ',' + - quoted_literal: '''Honda CRV''' + - end_bracket: ) + - end_bracket: ) + - alias_expression: + - keyword: AS + - quoted_identifier: '`sum(quantity)`' + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: id + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: id +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: city + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - grouping_sets_clause: + - keyword: GROUPING + - keyword: SETS + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - expression: + - bracketed: + - start_bracket: ( + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: city + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: city +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: city + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - grouping_sets_clause: + - keyword: GROUPING + - keyword: SETS + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - expression: + - bracketed: + - start_bracket: ( + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: city + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: city +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: city + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - comma: ',' + - grouping_sets_clause: + - keyword: GROUPING + - keyword: SETS + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - expression: + - bracketed: + - start_bracket: ( + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: city + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: city +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: city + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - with_cube_rollup_clause: + - keyword: WITH + - keyword: ROLLUP + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: city + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - with_cube_rollup_clause: + - keyword: WITH + - keyword: CUBE + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - function: + - function_name: + - function_name_identifier: first + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: age + - end_bracket: ) + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - function: + - function_name: + - function_name_identifier: first + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: age + - keyword: IGNORE + - keyword: NULLS + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: first_age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: last + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: id + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: last_id + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: id + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_id + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: people + - groupby_clause: + - keyword: GROUP + - keyword: BY + - cube_rollup_clause: + - function_name: + - function_name_identifier: cube + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - column_reference: + - naked_identifier: name + - comma: ',' + - column_reference: + - naked_identifier: age + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: people + - groupby_clause: + - keyword: GROUP + - keyword: BY + - cube_rollup_clause: + - function_name: + - function_name_identifier: cube + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - column_reference: + - naked_identifier: name + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: people + - groupby_clause: + - keyword: GROUP + - keyword: BY + - cube_rollup_clause: + - function_name: + - function_name_identifier: cube + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - column_reference: + - naked_identifier: name + - comma: ',' + - column_reference: + - naked_identifier: age + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: people + - groupby_clause: + - keyword: GROUP + - keyword: BY + - cube_rollup_clause: + - function_name: + - function_name_identifier: rollup + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - column_reference: + - naked_identifier: name + - comma: ',' + - column_reference: + - naked_identifier: age + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: people + - unparsable: + - word: GROUP + - word: BY + - word: ALL +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.sql new file mode 100644 index 000000000..d413ea74c --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.sql @@ -0,0 +1,9 @@ +select +lag(test) +over (ORDER BY test) +from schema.test_table; + +select +lag(test) +over (PARTITION BY test ORDER BY test) +from schema.test_table; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.yml new file mode 100644 index 000000000..4b4636964 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.yml @@ -0,0 +1,77 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - function: + - function_name: + - function_name_identifier: lag + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: test + - end_bracket: ) + - over_clause: + - keyword: over + - bracketed: + - start_bracket: ( + - window_specification: + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: test + - end_bracket: ) + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: schema + - dot: . + - naked_identifier: test_table +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - function: + - function_name: + - function_name_identifier: lag + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: test + - end_bracket: ) + - over_clause: + - keyword: over + - bracketed: + - start_bracket: ( + - window_specification: + - partitionby_clause: + - keyword: PARTITION + - keyword: BY + - expression: + - column_reference: + - naked_identifier: test + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: test + - end_bracket: ) + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: schema + - dot: . + - naked_identifier: test_table +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.sql new file mode 100644 index 000000000..176ce3086 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.sql @@ -0,0 +1,5 @@ +SET TIME ZONE LOCAL; +SET TIME ZONE 'America/Los_Angeles'; +SET TIME ZONE '+08:00'; +SET TIME ZONE INTERVAL 1 HOUR 30 MINUTES; +SET TIME ZONE INTERVAL '08:30:00' HOUR TO SECOND; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml new file mode 100644 index 000000000..c3af4484c --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml @@ -0,0 +1,44 @@ +file: +- statement: + - keyword: SET + - keyword: TIME + - keyword: ZONE + - keyword: LOCAL +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: TIME + - keyword: ZONE + - quoted_literal: '''America/Los_Angeles''' +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: TIME + - keyword: ZONE + - quoted_literal: '''+08:00''' +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: TIME + - keyword: ZONE + - interval_expression: + - keyword: INTERVAL + - interval_literal: + - numeric_literal: '1' + - date_part: HOUR + - interval_literal: + - numeric_literal: '30' + - date_part: MINUTES +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: TIME + - keyword: ZONE + - interval_expression: + - keyword: INTERVAL + - interval_literal: + - signed_quoted_literal: '''08:30:00''' + - date_part: HOUR + - keyword: TO + - date_part: SECOND +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.sql new file mode 100644 index 000000000..8142fbbdc --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.sql @@ -0,0 +1,20 @@ +-- simple assignment +SET VAR var1 = 5; + +-- A complex expression assignment +SET VARIABLE var1 = (SELECT max(c1) FROM VALUES(1), (2) AS t(c1)); + +-- resetting the variable to DEFAULT (set in declare) +SET VAR var1 = DEFAULT; + +-- A multi variable assignment +SET VAR (var1, var2, var3) = (VALUES(100,'x123',DEFAULT)); + +-- escpaed function name +SET VARIABLE `foo` = select 'bar'; + +-- function call +set var tz = current_timezone(); + +-- set multiple vars in one statement +set var x1 = 12, x2 = 'helloworld'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml new file mode 100644 index 000000000..14a053790 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml @@ -0,0 +1,154 @@ +file: +- statement: + - keyword: SET + - keyword: VAR + - expression: + - column_reference: + - naked_identifier: var1 + - comparison_operator: + - raw_comparison_operator: = + - numeric_literal: '5' +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: VARIABLE + - expression: + - column_reference: + - naked_identifier: var1 + - comparison_operator: + - raw_comparison_operator: = + - bracketed: + - start_bracket: ( + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - function: + - function_name: + - function_name_identifier: max + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: c1 + - end_bracket: ) + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - values_clause: + - keyword: VALUES + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - end_bracket: ) + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '2' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: t + - bracketed: + - start_bracket: ( + - identifier_list: + - naked_identifier: c1 + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: VAR + - expression: + - column_reference: + - naked_identifier: var1 + - comparison_operator: + - raw_comparison_operator: = + - column_reference: + - naked_identifier: DEFAULT +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: VAR + - expression: + - bracketed: + - start_bracket: ( + - column_reference: + - naked_identifier: var1 + - comma: ',' + - column_reference: + - naked_identifier: var2 + - comma: ',' + - column_reference: + - naked_identifier: var3 + - end_bracket: ) + - comparison_operator: + - raw_comparison_operator: = + - bracketed: + - start_bracket: ( + - values_clause: + - keyword: VALUES + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '100' + - comma: ',' + - expression: + - quoted_literal: '''x123''' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: DEFAULT + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: VARIABLE + - expression: + - column_reference: + - quoted_identifier: '`foo`' + - comparison_operator: + - raw_comparison_operator: = + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - quoted_literal: '''bar''' +- statement_terminator: ; +- statement: + - keyword: set + - keyword: var + - expression: + - column_reference: + - naked_identifier: tz + - comparison_operator: + - raw_comparison_operator: = + - function: + - function_name: + - function_name_identifier: current_timezone + - bracketed: + - start_bracket: ( + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: set + - keyword: var + - expression: + - column_reference: + - naked_identifier: x1 + - comparison_operator: + - raw_comparison_operator: = + - numeric_literal: '12' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: x2 + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''helloworld''' +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.sql new file mode 100644 index 000000000..103a1209b --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.sql @@ -0,0 +1,21 @@ +-- Lists all databases +SHOW DATABASES; + +-- List all databases from userdb catalog +SHOW DATABASES FROM userdb; + +-- List all databases in userdb catalog +SHOW DATABASES IN userdb; + +-- List all databases from default catalog matching the pattern `sam*` +SHOW DATABASES FROM default LIKE 'sam*'; + +-- List all databases from default catalog matching the pattern `sam*` +-- without LIKE keyword +SHOW DATABASES FROM default 'sam*'; + +-- List all databases matching the pattern `sam*|suj` without LIKE keyword +SHOW DATABASES 'sam*|suj'; + +-- Lists all databases. Keywords SCHEMAS and DATABASES are interchangeable. +SHOW SCHEMAS; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.yml new file mode 100644 index 000000000..825b492d0 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.yml @@ -0,0 +1,39 @@ +file: +- statement: + - show_statement: + - keyword: SHOW + - keyword: DATABASES +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: DATABASES +- file: + - word: FROM + - word: userdb + - semicolon: ; + - word: SHOW + - word: DATABASES + - word: IN + - word: userdb + - semicolon: ; + - word: SHOW + - word: DATABASES + - word: FROM + - word: default + - word: LIKE + - single_quote: '''sam*''' + - semicolon: ; + - word: SHOW + - word: DATABASES + - word: FROM + - word: default + - single_quote: '''sam*''' + - semicolon: ; + - word: SHOW + - word: DATABASES + - single_quote: '''sam*|suj''' + - semicolon: ; + - word: SHOW + - word: SCHEMAS + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.sql new file mode 100644 index 000000000..c632024b3 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.sql @@ -0,0 +1,54 @@ +-- Unfiltered list of all functions; +SHOW FUNCTIONS; + +-- List a system function `trim` by searching both user defined and system +-- defined functions. +SHOW FUNCTIONS trim; + +SHOW ALL FUNCTIONS trim; + +-- List a system function `concat` by searching system defined functions. +SHOW SYSTEM FUNCTIONS concat; + +-- List a user function `concat_user` by searching user defined functions. +SHOW USER FUNCTIONS concat_user; + +-- List a qualified function `max` from database `salesdb`. +SHOW SYSTEM FUNCTIONS salesdb.max; + +-- List all functions starting with `t` +SHOW FUNCTIONS LIKE 't*'; + +-- List all functions starting with `t` without LIKE keyword +SHOW FUNCTIONS 't*'; + +-- List all user functions starting with `t` +SHOW USER FUNCTIONS LIKE 't*'; + +-- List all user functions starting with `t` without LIKE keyword +SHOW USER FUNCTIONS 't*'; + +-- List all functions starting with `yea` or `windo` +SHOW FUNCTIONS LIKE 'yea*|windo*'; + +-- Use normal regex pattern to list function names that has 4 characters +-- with `t` as the starting character. +SHOW FUNCTIONS LIKE 't[a-z][a-z][a-z]'; + +-- List all functions from default schema +SHOW FUNCTIONS FROM default; + +-- List all user functions from default schema +SHOW USER FUNCTIONS FROM default; + +-- List all functions from default schema starting with `t` +SHOW FUNCTIONS FROM default LIKE 't*'; + +-- List all functions from default schema starting with `t` without LIKE keyword +SHOW FUNCTIONS FROM default 't*'; + +-- List all user functions from default schema starting with `t` +SHOW USER FUNCTIONS FROM default LIKE 't*'; + +-- List all user functions from default schema starting with `t` without LIKE keyword +SHOW USER FUNCTIONS FROM default 't*'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.yml new file mode 100644 index 000000000..ecb017261 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.yml @@ -0,0 +1,100 @@ +file: +- unparsable: + - word: SHOW + - word: FUNCTIONS + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: trim + - semicolon: ; + - word: SHOW + - word: ALL + - word: FUNCTIONS + - word: trim + - semicolon: ; + - word: SHOW + - word: SYSTEM + - word: FUNCTIONS + - word: concat + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - word: concat_user + - semicolon: ; + - word: SHOW + - word: SYSTEM + - word: FUNCTIONS + - word: salesdb + - dot: . + - word: max + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: LIKE + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - word: LIKE + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: LIKE + - single_quote: '''yea*|windo*''' + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: LIKE + - single_quote: '''t[a-z][a-z][a-z]''' + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: FROM + - word: default + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - word: FROM + - word: default + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: FROM + - word: default + - word: LIKE + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: FROM + - word: default + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - word: FROM + - word: default + - word: LIKE + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - word: FROM + - word: default + - single_quote: '''t*''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.sql new file mode 100644 index 000000000..987531a16 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.sql @@ -0,0 +1,18 @@ +-- List all tables in default database +SHOW TABLES; + +-- List all tables from userdb database +SHOW TABLES FROM userdb; + +-- List all tables in userdb database +SHOW TABLES IN userdb; + +-- List all tables from default database matching the pattern `sam*` +SHOW TABLES FROM default LIKE 'sam*'; + +-- List all tables from default database matching the pattern `sam*` +-- without LIKE keyword +SHOW TABLES FROM default 'sam*'; + +-- List all tables matching the pattern `sam*|suj` without LIKE keyword +SHOW TABLES 'sam*|suj'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml new file mode 100644 index 000000000..c6c4b70c1 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml @@ -0,0 +1,46 @@ +file: +- statement: + - show_statement: + - keyword: SHOW + - keyword: TABLES +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: TABLES + - keyword: FROM + - object_reference: + - naked_identifier: userdb +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: TABLES + - keyword: IN + - object_reference: + - naked_identifier: userdb +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: TABLES + - keyword: FROM + - object_reference: + - naked_identifier: default + - keyword: LIKE + - quoted_literal: '''sam*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: TABLES + - keyword: FROM + - object_reference: + - naked_identifier: default +- file: + - single_quote: '''sam*''' + - semicolon: ; + - word: SHOW + - word: TABLES + - single_quote: '''sam*|suj''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.sql new file mode 100644 index 000000000..b4d370fb3 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.sql @@ -0,0 +1,23 @@ +-- List all views in default database +SHOW VIEWS; + +-- List all views from userdb database +SHOW VIEWS FROM userdb; + +-- List all views in global temp view database +SHOW VIEWS IN global_temp; + +-- List all views from default database matching the pattern `sam*` +SHOW VIEWS FROM default LIKE 'sam*'; + +-- List all views from the current database +-- matching the pattern `sam|suj|temp*` +SHOW VIEWS LIKE 'sam|suj|temp*'; + +-- List all views from default database matching the pattern `sam*` +-- without LIKE keyword +SHOW VIEWS FROM default 'sam*'; + +-- List all views from the current database +-- matching the pattern `sam|suj|temp*` without LIKE keyword +SHOW VIEWS 'sam|suj|temp*'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml new file mode 100644 index 000000000..22e563a44 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml @@ -0,0 +1,61 @@ +file: +- statement: + - show_statement: + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS +- statement_terminator: ; +- statement: + - show_statement: + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - object_reference: + - naked_identifier: userdb +- statement_terminator: ; +- statement: + - show_statement: + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: IN + - object_reference: + - naked_identifier: global_temp +- statement_terminator: ; +- statement: + - show_statement: + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - object_reference: + - naked_identifier: default + - keyword: LIKE + - quoted_literal: '''sam*''' +- statement_terminator: ; +- statement: + - show_statement: + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: LIKE + - quoted_literal: '''sam|suj|temp*''' +- statement_terminator: ; +- statement: + - show_statement: + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - object_reference: + - naked_identifier: default + - quoted_literal: '''sam*''' +- statement_terminator: ; +- statement: + - show_statement: + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - quoted_literal: '''sam|suj|temp*''' +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.sql new file mode 100644 index 000000000..bae412208 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.sql @@ -0,0 +1,17 @@ +SHOW VOLUMES; + +SHOW VOLUMES IN sampledb; + +SHOW VOLUMES FROM sampledb; + +SHOW VOLUMES LIKE 'regex*'; + +SHOW VOLUMES 'regex*'; + +SHOW VOLUMES IN sampledb LIKE 'regex*'; + +SHOW VOLUMES IN sampledb 'regex*'; + +SHOW VOLUMES FROM sampledb LIKE 'regex*'; + +SHOW VOLUMES FROM sampledb 'regex*'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml new file mode 100644 index 000000000..5e2f6d32f --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml @@ -0,0 +1,73 @@ +file: +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: IN + - object_reference: + - naked_identifier: sampledb +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: FROM + - object_reference: + - naked_identifier: sampledb +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: LIKE + - quoted_literal: '''regex*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - quoted_literal: '''regex*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: IN + - object_reference: + - naked_identifier: sampledb + - keyword: LIKE + - quoted_literal: '''regex*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: IN + - object_reference: + - naked_identifier: sampledb + - quoted_literal: '''regex*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: FROM + - object_reference: + - naked_identifier: sampledb + - keyword: LIKE + - quoted_literal: '''regex*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: FROM + - object_reference: + - naked_identifier: sampledb + - quoted_literal: '''regex*''' +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.sql new file mode 100644 index 000000000..21e892c5b --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.sql @@ -0,0 +1,10 @@ +SELECT * + FROM sales UNPIVOT INCLUDE NULLS + (sales FOR quarter IN (q1 AS `Jan-Mar`, + q2 AS `Apr-Jun`, + q3 AS `Jul-Sep`, + sales.q4 AS `Oct-Dec`)); + +SELECT * + FROM oncall UNPIVOT ((name, email, phone) FOR precedence IN ((name1, email1, phone1) AS primary, + (name2, email2, phone2) AS secondary)); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.yml new file mode 100644 index 000000000..18b854bd0 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.yml @@ -0,0 +1,99 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - wildcard_expression: + - wildcard_identifier: + - star: '*' + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - alias_expression: + - naked_identifier: UNPIVOT + - unparsable: + - word: INCLUDE + - word: NULLS + - start_bracket: ( + - word: sales + - word: FOR + - word: quarter + - word: IN + - start_bracket: ( + - word: q1 + - word: AS + - back_quote: '`Jan-Mar`' + - comma: ',' + - word: q2 + - word: AS + - back_quote: '`Apr-Jun`' + - comma: ',' + - word: q3 + - word: AS + - back_quote: '`Jul-Sep`' + - comma: ',' + - word: sales + - dot: . + - word: q4 + - word: AS + - back_quote: '`Oct-Dec`' + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - wildcard_expression: + - wildcard_identifier: + - star: '*' + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: oncall + - alias_expression: + - naked_identifier: UNPIVOT + - unparsable: + - start_bracket: ( + - start_bracket: ( + - word: name + - comma: ',' + - word: email + - comma: ',' + - word: phone + - end_bracket: ) + - word: FOR + - word: precedence + - word: IN + - start_bracket: ( + - start_bracket: ( + - word: name1 + - comma: ',' + - word: email1 + - comma: ',' + - word: phone1 + - end_bracket: ) + - word: AS + - word: primary + - comma: ',' + - start_bracket: ( + - word: name2 + - comma: ',' + - word: email2 + - comma: ',' + - word: phone2 + - end_bracket: ) + - word: AS + - word: secondary + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.sql new file mode 100644 index 000000000..39b5afcfd --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.sql @@ -0,0 +1,11 @@ +USE CATALOG catalog_name; + +-- Use the 'hive_metastore' . +USE CATALOG hive_metastore; + +USE CATALOG 'hive_metastore'; + +-- Use the 'some_catalog' +USE CATALOG `some_catalog`; + +USE CATALOG some_cat; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml new file mode 100644 index 000000000..b8580b68c --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml @@ -0,0 +1,25 @@ +file: +- statement: + - use_statement: + - keyword: USE + - object_reference: + - naked_identifier: CATALOG +- file: + - word: catalog_name + - semicolon: ; + - word: USE + - word: CATALOG + - word: hive_metastore + - semicolon: ; + - word: USE + - word: CATALOG + - single_quote: '''hive_metastore''' + - semicolon: ; + - word: USE + - word: CATALOG + - back_quote: '`some_catalog`' + - semicolon: ; + - word: USE + - word: CATALOG + - word: some_cat + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.sql new file mode 100644 index 000000000..2bc4e4cc1 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.sql @@ -0,0 +1,18 @@ +USE database_name; + +-- Use the 'userdb' +USE userdb; + +-- Use the 'userdb1' +USE userdb1; + +-- Keywords SCHEMA and DATABASE are interchangeable. +USE DATABASE database_name; + +USE SCHEMA database_name; + +USE IDENTIFIER('database_name'); + +USE DATABASE IDENTIFIER('database_name'); + +USE SCHEMA IDENTIFIER('database_name'); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml new file mode 100644 index 000000000..508979b44 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml @@ -0,0 +1,57 @@ +file: +- statement: + - use_statement: + - keyword: USE + - object_reference: + - naked_identifier: database_name +- statement_terminator: ; +- statement: + - use_statement: + - keyword: USE + - object_reference: + - naked_identifier: userdb +- statement_terminator: ; +- statement: + - use_statement: + - keyword: USE + - object_reference: + - naked_identifier: userdb1 +- statement_terminator: ; +- statement: + - use_database_statement: + - keyword: USE + - keyword: DATABASE + - object_reference: + - naked_identifier: database_name +- statement_terminator: ; +- statement: + - use_database_statement: + - keyword: USE + - keyword: SCHEMA + - object_reference: + - naked_identifier: database_name +- statement_terminator: ; +- statement: + - use_statement: + - keyword: USE + - object_reference: + - naked_identifier: IDENTIFIER +- file: + - start_bracket: ( + - single_quote: '''database_name''' + - end_bracket: ) + - semicolon: ; + - word: USE + - word: DATABASE + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''database_name''' + - end_bracket: ) + - semicolon: ; + - word: USE + - word: SCHEMA + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''database_name''' + - end_bracket: ) + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/sparksql/show_views.yml b/crates/lib-dialects/test/fixtures/dialects/sparksql/show_views.yml index 829773aee..e350eb1af 100644 --- a/crates/lib-dialects/test/fixtures/dialects/sparksql/show_views.yml +++ b/crates/lib-dialects/test/fixtures/dialects/sparksql/show_views.yml @@ -1,39 +1,44 @@ file: - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: FROM - - database_reference: - - naked_identifier: userdb + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - database_reference: + - naked_identifier: userdb - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: IN - - database_reference: - - naked_identifier: global_temp + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: IN + - database_reference: + - naked_identifier: global_temp - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: FROM - - database_reference: - - naked_identifier: default - - keyword: LIKE - - quoted_literal: '''sam*''' + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - database_reference: + - naked_identifier: default + - keyword: LIKE + - quoted_literal: '''sam*''' - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: LIKE - - quoted_literal: '''sam|suj|temp*''' + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: LIKE + - quoted_literal: '''sam|suj|temp*''' - statement_terminator: ;