From 5ba349a3c0253be2282a1daf23ae7e3cf7f80f66 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Thu, 19 Dec 2024 12:21:45 +0100 Subject: [PATCH 01/19] refactor: move show statements to node matcher --- crates/lib-core/src/dialects/syntax.rs | 5 +- crates/lib-dialects/src/sparksql.rs | 38 +++++++------- .../fixtures/dialects/sparksql/show_views.yml | 51 ++++++++++--------- 3 files changed, 51 insertions(+), 43 deletions(-) diff --git a/crates/lib-core/src/dialects/syntax.rs b/crates/lib-core/src/dialects/syntax.rs index 5bb5ab8b1..7876d7553 100644 --- a/crates/lib-core/src/dialects/syntax.rs +++ b/crates/lib-core/src/dialects/syntax.rs @@ -359,6 +359,7 @@ pub enum SyntaxKind { CreateStreamStatement, AlterStreamStatement, ShowStatement, + ShowViewsStatement, AlterUserStatement, AlterSessionStatement, AlterSessionSetStatement, @@ -611,7 +612,7 @@ impl SyntaxKind { } #[derive(Clone, PartialEq, Eq, Default)] -pub struct SyntaxSet([u64; 9]); +pub struct SyntaxSet([u64; 10]); impl std::fmt::Debug for SyntaxSet { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -620,7 +621,7 @@ impl std::fmt::Debug for SyntaxSet { } impl SyntaxSet { - pub const EMPTY: SyntaxSet = Self([0; 9]); + pub const EMPTY: SyntaxSet = Self([0; 10]); const SLICE_BITS: u16 = u64::BITS as u16; pub const fn new(kinds: &[SyntaxKind]) -> Self { diff --git a/crates/lib-dialects/src/sparksql.rs b/crates/lib-dialects/src/sparksql.rs index a6f3cd8b9..7ad9022d2 100644 --- a/crates/lib-dialects/src/sparksql.rs +++ b/crates/lib-dialects/src/sparksql.rs @@ -2572,26 +2572,28 @@ pub fn dialect() -> Dialect { ), ( "ShowViewsStatement".into(), - Sequence::new(vec_of_erased![ - Ref::keyword("SHOW"), - Ref::keyword("VIEWS"), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]), - Ref::new("DatabaseReferenceSegment") - ]) - .config(|config| { - config.optional(); - }), + NodeMatcher::new( + SyntaxKind::ShowViewsStatement, Sequence::new(vec_of_erased![ - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment") + Ref::keyword("SHOW"), + Ref::keyword("VIEWS"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]), + Ref::new("DatabaseReferenceSegment") + ]) + .config(|config| { + config.optional(); + }), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment") + ]) + .config(|config| { + config.optional(); + }) ]) - .config(|config| { - config.optional(); - }) - ]) - .to_matchable() - .into(), + .to_matchable(), + ).to_matchable().into(), ), ( "SetStatementSegment".into(), diff --git a/crates/lib-dialects/test/fixtures/dialects/sparksql/show_views.yml b/crates/lib-dialects/test/fixtures/dialects/sparksql/show_views.yml index 829773aee..e350eb1af 100644 --- a/crates/lib-dialects/test/fixtures/dialects/sparksql/show_views.yml +++ b/crates/lib-dialects/test/fixtures/dialects/sparksql/show_views.yml @@ -1,39 +1,44 @@ file: - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: FROM - - database_reference: - - naked_identifier: userdb + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - database_reference: + - naked_identifier: userdb - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: IN - - database_reference: - - naked_identifier: global_temp + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: IN + - database_reference: + - naked_identifier: global_temp - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: FROM - - database_reference: - - naked_identifier: default - - keyword: LIKE - - quoted_literal: '''sam*''' + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - database_reference: + - naked_identifier: default + - keyword: LIKE + - quoted_literal: '''sam*''' - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: LIKE - - quoted_literal: '''sam|suj|temp*''' + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: LIKE + - quoted_literal: '''sam|suj|temp*''' - statement_terminator: ; From 05ad57f6b79261df217f3f3484e507ddb204c511 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Tue, 17 Dec 2024 11:11:22 +0100 Subject: [PATCH 02/19] temp working on things --- crates/lib-core/src/dialects/init.rs | 1 + crates/lib-dialects/Cargo.toml | 2 + crates/lib-dialects/src/databricks.rs | 10 ++ .../lib-dialects/src/databricks_keywords.rs | 47 ++++++ crates/lib-dialects/src/lib.rs | 6 + .../test/fixtures/dialects/databricks/.sqruff | 2 + .../dialects/databricks/alter_catalog.sql | 18 +++ .../dialects/databricks/alter_database.sql | 27 ++++ .../dialects/databricks/alter_table.sql | 95 ++++++++++++ .../dialects/databricks/alter_view.sql | 34 +++++ .../dialects/databricks/alter_volume.sql | 16 ++ .../databricks/command_terminator.sql | 12 ++ .../dialects/databricks/comment_on.sql | 19 +++ .../dialects/databricks/create_catalog.sql | 9 ++ .../dialects/databricks/create_database.sql | 30 ++++ .../dialects/databricks/create_function.sql | 106 +++++++++++++ .../dialects/databricks/create_table.sql | 80 ++++++++++ .../dialects/databricks/create_volume.sql | 27 ++++ .../dialects/databricks/date_functions.sql | 19 +++ .../declare_or_replace_variable.sql | 7 + .../dialects/databricks/describe_volume.sql | 2 + .../dialects/databricks/drop_catalog.sql | 5 + .../dialects/databricks/drop_volume.sql | 5 + .../dialects/databricks/magic_line.sql | 24 +++ .../dialects/databricks/named_argument.sql | 7 + .../fixtures/dialects/databricks/pivot.sql | 38 +++++ .../fixtures/dialects/databricks/select.sql | 11 ++ .../databricks/select_from_lateral_view.sql | 114 ++++++++++++++ .../dialects/databricks/select_group_by.sql | 140 ++++++++++++++++++ .../dialects/databricks/select_window.sql | 9 ++ .../dialects/databricks/set_time_zone.sql | 5 + .../dialects/databricks/set_variable.sql | 20 +++ .../dialects/databricks/show_databases.sql | 21 +++ .../dialects/databricks/show_functions.sql | 54 +++++++ .../dialects/databricks/show_tables.sql | 18 +++ .../dialects/databricks/show_views.sql | 23 +++ .../dialects/databricks/show_volumes.sql | 17 +++ .../fixtures/dialects/databricks/unpivot.sql | 10 ++ .../dialects/databricks/use_catalog.sql | 11 ++ .../dialects/databricks/use_database.sql | 18 +++ 40 files changed, 1119 insertions(+) create mode 100644 crates/lib-dialects/src/databricks.rs create mode 100644 crates/lib-dialects/src/databricks_keywords.rs create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/.sqruff create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/create_database.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/create_function.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/create_table.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/pivot.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/select.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/select_window.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/show_views.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.sql create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/use_database.sql diff --git a/crates/lib-core/src/dialects/init.rs b/crates/lib-core/src/dialects/init.rs index 829f7753b..ce344bd81 100644 --- a/crates/lib-core/src/dialects/init.rs +++ b/crates/lib-core/src/dialects/init.rs @@ -22,6 +22,7 @@ pub enum DialectKind { Athena, Bigquery, Clickhouse, + Databricks, Duckdb, Postgres, Redshift, diff --git a/crates/lib-dialects/Cargo.toml b/crates/lib-dialects/Cargo.toml index b98f7b5a7..0615e5c54 100644 --- a/crates/lib-dialects/Cargo.toml +++ b/crates/lib-dialects/Cargo.toml @@ -21,6 +21,7 @@ default = [ "athena", "bigquery", "clickhouse", + "databricks", "duckdb", "hive", "postgres", @@ -33,6 +34,7 @@ default = [ athena = [] bigquery = [] clickhouse = [] +databricks = ["sparksql"] duckdb = ["postgres"] hive = [] postgres = [] diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs new file mode 100644 index 000000000..e097253e9 --- /dev/null +++ b/crates/lib-dialects/src/databricks.rs @@ -0,0 +1,10 @@ +use sqruff_lib_core::dialects::{base::Dialect, init::DialectKind}; + +pub fn dialect() -> Dialect { + let sparksql = crate::sparksql::dialect(); + + let mut databricks = sparksql; + databricks.name = DialectKind::Databricks; + + return databricks; +} diff --git a/crates/lib-dialects/src/databricks_keywords.rs b/crates/lib-dialects/src/databricks_keywords.rs new file mode 100644 index 000000000..7ce50bdbc --- /dev/null +++ b/crates/lib-dialects/src/databricks_keywords.rs @@ -0,0 +1,47 @@ +/// Datbricks reserver keywords which can be found https://docs.databricks.com/sql/language-manual/sql-ref-reserved-words.html + +pub(crate) const RESERVED_KEYWORDS: &[&'static str] = &[ + "ANTI", + "CROSS", + "EXCEPT", + "FULL", + "INNER", + "INTERSECT", + "JOIN", + "LATERAL", + "LEFT", + "MINUS", + "NATURAL", + "ON", + "RIGHT", + "SEMI", + "UNION", + "USING", +]; + +pub(crate) const UNRESERVED_KEYWORDS: &[&'static str] = &[ + "CATALOG", + "COMPENSATION", + "CRON", + "ENFORCED", + "EVOLUTION", + "FEATURE", + "IDENTIFIER", + "MANAGED", + "MASK", + "NORELY", + "OPTIMIZATION", + "OPTIMIZE", + "PREDICTIVE", + "PROVIDER", + "PYTHON", + "RECIPIENT", + "RELY", + "SCHEDULE", + "SQL", + "TAGS", + "TIMESERIES", + "VOLUME", + "VOLUMES", + "ZORDER", +]; diff --git a/crates/lib-dialects/src/lib.rs b/crates/lib-dialects/src/lib.rs index 01ea46e54..4b2833579 100644 --- a/crates/lib-dialects/src/lib.rs +++ b/crates/lib-dialects/src/lib.rs @@ -15,6 +15,10 @@ mod bigquery_keywords; pub mod clickhouse; #[cfg(feature = "clickhouse")] mod clickhouse_keywords; +#[cfg(feature = "databricks")] +pub mod databricks; +#[cfg(feature = "databricks")] +pub mod databricks_keywords; #[cfg(feature = "duckdb")] pub mod duckdb; #[cfg(feature = "hive")] @@ -54,6 +58,8 @@ pub fn kind_to_dialect(kind: &DialectKind) -> Option { DialectKind::Bigquery => bigquery::dialect(), #[cfg(feature = "clickhouse")] DialectKind::Clickhouse => clickhouse::dialect(), + #[cfg(feature = "databricks")] + DialectKind::Databricks => databricks::dialect(), #[cfg(feature = "duckdb")] DialectKind::Duckdb => duckdb::dialect(), #[cfg(feature = "postgres")] diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/.sqruff b/crates/lib-dialects/test/fixtures/dialects/databricks/.sqruff new file mode 100644 index 000000000..5aae42e07 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/.sqruff @@ -0,0 +1,2 @@ +[sqlfluff] +dialect = databricks diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.sql new file mode 100644 index 000000000..816d7a7bd --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.sql @@ -0,0 +1,18 @@ +-- Transfer ownership of the catalog to another user +ALTER CATALOG some_cat OWNER TO `alf@melmak.et`; +ALTER CATALOG some_cat OWNER TO my_group; + +-- SET is allowed as an optional keyword +ALTER CATALOG some_cat SET OWNER TO `alf@melmak.et`; +ALTER CATALOG some_cat SET OWNER TO my_group; + +-- Set and unset catalog tags +ALTER CATALOG some_cat SET TAGS ('tag1'='value1'); +ALTER CATALOG some_cat SET TAGS ('tag2'='value2', 'tag3'='value3'); +ALTER CATALOG some_cat UNSET TAGS ('tag1'); +ALTER CATALOG some_cat UNSET TAGS ('tag2', 'tag3'); + +-- Enable/Inherit/Disable Predictive Optimization +ALTER CATALOG some_cat ENABLE PREDICTIVE OPTIMIZATION; +ALTER CATALOG some_cat INHERIT PREDICTIVE OPTIMIZATION; +ALTER CATALOG some_cat DISABLE PREDICTIVE OPTIMIZATION; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.sql new file mode 100644 index 000000000..7351e47f6 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.sql @@ -0,0 +1,27 @@ +-- Transfer ownership of the schema to another user +ALTER SCHEMA some_cat OWNER TO `alf@melmak.et`; +ALTER SCHEMA some_cat OWNER TO my_group; + +-- SET is allowed as an optional keyword +ALTER SCHEMA some_cat SET OWNER TO `alf@melmak.et`; +ALTER SCHEMA some_cat SET OWNER TO my_group; + +-- DATABASE IS ALLOWED INSTEAD OF SCHEMA +ALTER DATABASE some_cat OWNER TO `alf@melmak.et`; +ALTER DATABASE some_cat SET OWNER TO `alf@melmak.et`; +ALTER DATABASE some_cat OWNER TO my_group; +ALTER DATABASE some_cat SET OWNER TO my_group; + +-- Set and unset schema tags +ALTER SCHEMA some_cat SET TAGS ('tag1'='value1'); +ALTER DATABASE some_cat SET TAGS ('tag2'='value2', 'tag3'='value3'); +ALTER DATABASE some_cat UNSET TAGS ('tag1'); +ALTER SCHEMA some_cat UNSET TAGS ('tag2', 'tag3'); + +-- Enable/Inherit/Disable Predictive Optimization +ALTER SCHEMA some_cat ENABLE PREDICTIVE OPTIMIZATION; +ALTER DATABASE some_cat INHERIT PREDICTIVE OPTIMIZATION; +ALTER SCHEMA some_cat DISABLE PREDICTIVE OPTIMIZATION; + +-- -- Add some schema properties +ALTER SCHEMA some_cat SET DBPROPERTIES ('Edited-by'='John Doe', 'Edit-date'='2020-01-01'); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.sql new file mode 100644 index 000000000..4399cdac9 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.sql @@ -0,0 +1,95 @@ +-- ALTER TABLE examples from Databricks documentation +-- https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-table.html + +ALTER TABLE Student RENAME TO StudentInfo; + +ALTER TABLE default.StudentInfo PARTITION (age='10') RENAME TO PARTITION (age='15'); + +ALTER TABLE StudentInfo ADD columns (LastName string, DOB timestamp); + +ALTER TABLE StudentInfo DROP COLUMN (DOB); + +ALTER TABLE StudentInfo DROP COLUMNS IF EXISTS (LastName, DOB); + +ALTER TABLE StudentInfo ADD IF NOT EXISTS PARTITION (age=18); + +ALTER TABLE StudentInfo DROP IF EXISTS PARTITION (age=18); + +ALTER TABLE StudentInfo ADD IF NOT EXISTS PARTITION (age=18) PARTITION (age=20); + +ALTER TABLE StudentInfo RECOVER PARTITIONS; + +ALTER TABLE StudentInfo ALTER COLUMN name COMMENT "new comment"; + +ALTER TABLE StudentInfo RENAME COLUMN name TO FirstName; + +-- Change the file Location +ALTER TABLE dbx.tab1 PARTITION (a='1', b='2') SET LOCATION '/path/to/part/ways'; + +-- SET SERDE/ SERDE Properties (DBR only) +ALTER TABLE test_tab SET SERDE 'org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe'; + +ALTER TABLE dbx.tab1 SET SERDE 'org.apache.hadoop' WITH SERDEPROPERTIES ('k' = 'v', 'kay' = 'vee'); + +-- SET TABLE PROPERTIES +ALTER TABLE dbx.tab1 SET TBLPROPERTIES ('winner' = 'loser'); + +-- DROP TABLE PROPERTIES +ALTER TABLE dbx.tab1 UNSET TBLPROPERTIES ('winner'); + +-- Drop the "deletion vectors" from a Delta table +ALTER TABLE my_table DROP FEATURE deletionVectors; + +-- 24 hours later +ALTER TABLE my_table DROP FEATURE deletionVectors TRUNCATE HISTORY; + +-- Applies three tags to the table named `test`. +ALTER TABLE test SET TAGS ('tag1' = 'val1', 'tag2' = 'val2', 'tag3' = 'val3'); + +-- Removes three tags from the table named `test`. +ALTER TABLE test UNSET TAGS ('tag1', 'tag2', 'tag3'); + +-- Applies three tags to table `main.schema1.test` column `col1`. +ALTER TABLE main.schema1.test ALTER COLUMN col1 SET TAGS ('tag1' = 'val1', 'tag2' = 'val2', 'tag3' = 'val3'); + +-- Removes three tags from table `main.schema1.test` column `col1`. +ALTER TABLE main.schema1.test ALTER COLUMN col1 UNSET TAGS ('tag1', 'tag2', 'tag3'); + +-- Enables predictive optimization for my_table +ALTER TABLE my_table ENABLE PREDICTIVE OPTIMIZATION; + +ALTER TABLE sales SET ROW FILTER us_filter ON (); + +ALTER TABLE sales SET ROW FILTER us_filter ON (region); + +ALTER TABLE sales DROP ROW FILTER; + +ALTER TABLE users ALTER COLUMN ssn SET MASK ssn_mask; + +ALTER TABLE users ALTER COLUMN ssn SET MASK ssn_mask USING COLUMNS (ssn_value); + +ALTER TABLE users ALTER COLUMN ssn DROP MASK; + +ALTER TABLE persons ADD CONSTRAINT persons_pk PRIMARY KEY(first_name, last_name); + +ALTER TABLE pets ADD CONSTRAINT pets_persons_fk + FOREIGN KEY(owner_first_name, owner_last_name) REFERENCES persons + NOT ENFORCED RELY; + +ALTER TABLE pets ADD CONSTRAINT pets_name_not_cute_chk CHECK (length(name) < 20); + +ALTER TABLE pets DROP CONSTRAINT pets_name_not_cute_chk; + +ALTER TABLE persons DROP CONSTRAINT persons_pk RESTRICT; + +ALTER TABLE pets DROP FOREIGN KEY IF EXISTS (owner_first_name, owner_last_name); + +ALTER TABLE persons DROP PRIMARY KEY CASCADE; + +ALTER TABLE rocks DROP COLUMN rock; + +ALTER TABLE rocks DROP COLUMN rock, loc; + +ALTER TABLE rocks DROP COLUMN IF EXISTS rock, loc; + +ALTER TABLE rocks DROP COLUMN IF EXISTS (rock, loc); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.sql new file mode 100644 index 000000000..a4c57db08 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.sql @@ -0,0 +1,34 @@ +-- ALTER TABLE examples from Databricks documentation +-- https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-view.html + +ALTER VIEW tempsc1.v1 RENAME TO tempsc1.v2; + +ALTER VIEW IDENTIFIER('tempsc1.v1') RENAME TO IDENTIFIER('tempsc1.v2'); + +ALTER VIEW tempsc1.v2 SET TBLPROPERTIES ('created.by.user' = "John", 'created.date' = '01-01-2001' ); + +ALTER VIEW tempsc1.v2 UNSET TBLPROPERTIES (`created`.`by`.`user`, created.date); + +ALTER VIEW tempsc1.v2 AS SELECT * FROM tempsc1.v1; + +ALTER VIEW v1 OWNER TO `alf@melmak.et`; + +ALTER VIEW v1 SET OWNER TO `alf@melmak.et`; + +ALTER VIEW v1 WITH SCHEMA BINDING; +ALTER VIEW v1 WITH SCHEMA COMPENSATION; +ALTER VIEW v1 WITH SCHEMA TYPE EVOLUTION; +ALTER VIEW v1 WITH SCHEMA EVOLUTION; + +ALTER MATERIALIZED VIEW my_mv + ADD SCHEDULE CRON '0 0 0 * * ? *' AT TIME ZONE 'America/Los_Angeles'; + +ALTER MATERIALIZED VIEW my_mv + ALTER SCHEDULE CRON '0 0/15 * * * ? *'; + +ALTER MATERIALIZED VIEW my_mv + DROP SCHEDULE; + +ALTER VIEW test SET TAGS ('tag1' = 'val1', 'tag2' = 'val2', 'tag3' = 'val3'); + +ALTER VIEW test UNSET TAGS ('tag1', 'tag2', 'tag3'); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.sql new file mode 100644 index 000000000..417e74ee1 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.sql @@ -0,0 +1,16 @@ +-- Rename a volume +ALTER VOLUME some_vol RENAME TO some_new_vol; + +-- Transfer ownership of the volume to another user +ALTER VOLUME some_vol OWNER TO `alf@melmak.et`; +ALTER VOLUME some_vol OWNER TO my_group; + +-- SET is allowed as an optional keyword +ALTER VOLUME some_vol SET OWNER TO `alf@melmak.et`; +ALTER VOLUME some_vol SET OWNER TO my_group; + +-- Set and unset volume tags +ALTER VOLUME some_vol SET TAGS ('tag1'='value1'); +ALTER VOLUME some_vol SET TAGS ('tag2'='value2', 'tag3'='value3'); +ALTER VOLUME some_vol UNSET TAGS ('tag1'); +ALTER VOLUME some_vol UNSET TAGS ('tag2', 'tag3'); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.sql new file mode 100644 index 000000000..ad6185285 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.sql @@ -0,0 +1,12 @@ +-- Databricks notebook source + +SELECT COL1 FROM TABLE1 + +-- COMMAND ---------- + +SELECT COL2 FROM TABLE2 + +-- COMMAND ---------- + +SELECT COL3 FROM TABLE3; +SELECT COL4 FROM TABLE4; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.sql new file mode 100644 index 000000000..3b91abfa8 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.sql @@ -0,0 +1,19 @@ +COMMENT ON CATALOG my_catalog IS 'This is my catalog'; + +COMMENT ON CONNECTION mysql_connection IS 'this is a mysql connection'; + +COMMENT ON SCHEMA my_schema IS 'This is my schema'; + +COMMENT ON DATABASE my_other_schema IS 'This is my other schema'; + +COMMENT ON TABLE my_table IS 'This is my table'; + +COMMENT ON TABLE my_table IS NULL; + +COMMENT ON SHARE my_share IS 'A good share'; + +COMMENT ON RECIPIENT my_recipient IS 'A good recipient'; + +COMMENT ON PROVIDER my_provider IS 'A good provider'; + +COMMENT ON VOLUME my_volume IS 'Huge volume'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.sql new file mode 100644 index 000000000..04803bbec --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.sql @@ -0,0 +1,9 @@ +-- Create catalog `customer_cat`. +-- This throws exception if catalog with name customer_cat already exists. +CREATE CATALOG customer_cat; + +-- Create catalog `customer_cat` only if catalog with same name doesn't exist. +CREATE CATALOG IF NOT EXISTS customer_cat; + +-- Create catalog `customer_cat` only if catalog with same name doesn't exist, with a comment. +CREATE CATALOG IF NOT EXISTS customer_cat COMMENT 'This is customer catalog'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.sql new file mode 100644 index 000000000..819f8efe9 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.sql @@ -0,0 +1,30 @@ +-- Create database with all optional syntax +CREATE DATABASE IF NOT EXISTS database_name +COMMENT "database_comment" +LOCATION "root/database_directory" +WITH DBPROPERTIES ( "property_name" = "property_value"); + +-- Create schema with all optional syntax +CREATE SCHEMA IF NOT EXISTS database_name +COMMENT "database_comment" +LOCATION "root/database_directory" +WITH DBPROPERTIES ( "property_name" = "property_value" ); + +-- Create database `customer_db`. +CREATE DATABASE customer_db; + +-- Create database `customer_db` only if database with same name doesn't exist. +CREATE DATABASE IF NOT EXISTS customer_db; + +-- `Comments`,`Specific Location` and `Database properties`. +CREATE DATABASE IF NOT EXISTS customer_db +COMMENT 'This is customer database' LOCATION '/user' +WITH DBPROPERTIES ("ID" = "001", "Name" = 'John'); + +-- Create `inventory_db` Database +CREATE DATABASE inventory_db +COMMENT 'This database is used to maintain Inventory'; + +-- Create schema with a managed location +CREATE SCHEMA IF NOT EXISTS database_name +MANAGED LOCATION "s3://root_database_bucket/" diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.sql new file mode 100644 index 000000000..b84a5e451 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.sql @@ -0,0 +1,106 @@ +-- Create FUNCTION with all optional syntax +CREATE OR REPLACE TEMPORARY FUNCTION IF NOT EXISTS +function_name AS "class_name" USING FILE "resource_locations"; + +-- Create a permanent function called `simple_udf`. +CREATE FUNCTION simple_udf AS 'SimpleUdf' +USING JAR '/tmp/SimpleUdf.jar'; + +-- Created a temporary function. +CREATE TEMPORARY FUNCTION simple_temp_udf AS 'SimpleUdf' +USING JAR '/tmp/SimpleUdf.jar'; + +-- Replace the implementation of `simple_udf` +CREATE OR REPLACE FUNCTION simple_udf AS 'SimpleUdfR' +USING JAR '/tmp/SimpleUdfR.jar'; + +-- Create a permanent function `test_avg` +CREATE FUNCTION test_avg +AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage'; + +---- Create Temporary function `test_avg` +CREATE TEMPORARY FUNCTION test_avg +AS 'org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage'; + +-- Create a temporary function with no parameter +CREATE TEMPORARY FUNCTION hello() +RETURNS STRING RETURN 'Hello World!'; + +-- Create a temporary function with no parameter. +CREATE OR REPLACE TEMPORARY FUNCTION function_name() +RETURNS TIMESTAMP LANGUAGE SQL +RETURN SELECT MAX(time) AS time FROM my_table; + +-- Create a permanent function with parameters +CREATE FUNCTION area(x DOUBLE, y DOUBLE) +RETURNS DOUBLE +RETURN x * y; + +-- Compose SQL functions. +CREATE FUNCTION square(x DOUBLE) +RETURNS DOUBLE +RETURN area(x, x); + +-- Create a CTE function +CREATE FUNCTION cte_function(x INT) +RETURNS string +LANGUAGE SQL +RETURN +WITH cte AS (SELECT x AS y) +SELECT * FROM cte; + +-- Create a non-deterministic function +CREATE FUNCTION roll_dice() + RETURNS INT + NOT DETERMINISTIC + CONTAINS SQL + COMMENT 'Roll a single 6 sided die' + RETURN (rand() * 6)::INT + 1; + + +-- Create a non-deterministic function with parameters and defaults +CREATE FUNCTION roll_dice(num_dice INT DEFAULT 1 COMMENT 'number of dice to roll (Default: 1)', + num_sides INT DEFAULT 6 COMMENT 'number of sides per die (Default: 6)') + RETURNS INT + NOT DETERMINISTIC + CONTAINS SQL + COMMENT 'Roll a number of n-sided dice' + RETURN aggregate(sequence(1, roll_dice.num_dice, 1), + 0, + (acc, x) -> (rand() * roll_dice.num_sides)::int, + acc -> acc + roll_dice.num_dice); + +-- Create Python functions +CREATE FUNCTION main.default.greet(s STRING) + RETURNS STRING + LANGUAGE PYTHON + AS $$ + def greet(name): + return "Hello " + name + "!" + + return greet(s) if s else None + $$; + +-- Created Table Valued Function simple +CREATE FUNCTION return_table() +RETURNS TABLE +RETURN +SELECT time FROM my_table +; + +-- Created Table Valued Function with column spec + comment +CREATE FUNCTION return_table() +RETURNS TABLE (col_a string, col_b string comment "asdf") +RETURN +SELECT col_a, col_b FROM my_table +; + + +-- backticked identifier +create or replace function `catalog`.`schema`.`name` ( + param int +) +returns int +return +select param +; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.sql new file mode 100644 index 000000000..342959dbf --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.sql @@ -0,0 +1,80 @@ +CREATE TABLE tablename +( + id_column INT, + othercolumn STRING, + generated_always_as_expression DATE GENERATED ALWAYS AS (CAST(birth_date AS DATE)), + generated_by_default BIGINT GENERATED BY DEFAULT AS IDENTITY, + generated_always BIGINT GENERATED ALWAYS AS IDENTITY, + generated_column_start_with BIGINT GENERATED ALWAYS AS IDENTITY (START WITH 10), + generated_column_increment_by BIGINT GENERATED ALWAYS AS IDENTITY (INCREMENT BY 5), + generated_column_start_with_increment_by BIGINT GENERATED ALWAYS AS IDENTITY (START WITH 10 INCREMENT BY 5) +) +USING DELTA +LOCATION "s3://someplace" +CLUSTER BY (id_column); +OPTIMIZE tablename; + + +OPTIMIZE tablename +WHERE date >= current_timestamp() - INTERVAL 1 day +ZORDER BY (eventType, eventTime); + + +-- Creates a Delta table +CREATE TABLE student (id INT, name STRING, age INT); + +-- Use data from another table +CREATE TABLE student_copy AS SELECT * FROM student; + +-- Creates a CSV table from an external directory +CREATE TABLE student USING CSV LOCATION '/path/to/csv_files'; + +-- Specify table comment and properties +CREATE TABLE student (id INT, name STRING, age INT) + COMMENT 'this is a comment' + TBLPROPERTIES ('foo'='bar'); + +-- Specify table comment and properties with different clauses order +CREATE TABLE student (id INT, name STRING, age INT) + TBLPROPERTIES ('foo'='bar') + COMMENT 'this is a comment'; + +-- Create partitioned table +CREATE TABLE student (id INT, name STRING, age INT) + PARTITIONED BY (age); + +-- Create a table with a generated column +CREATE TABLE rectangles(a INT, b INT, + area INT GENERATED ALWAYS AS (a * b)); + +-- Create a table with a primary key +CREATE TABLE rectangles(a INT, b INT PRIMARY KEY); + +-- Create a table with a not null primary key +CREATE TABLE rectangles(a INT NOT NULL, b INT NOT NULL PRIMARY KEY); + +-- Create a table with a foreign key relation +CREATE OR REPLACE TABLE TABLE1 ( + DATE_VALUE DATE NOT NULL + CONSTRAINT DATE_CONSTRAINT + FOREIGN KEY REFERENCES TABLE2 +); + +-- Create a table with a column with default value +CREATE TABLE student (id INT, name STRING DEFAULT 'bobby tables', age INT); + +-- Create a table with non nullable column with default value +CREATE TABLE student (id INT, name STRING NOT NULL DEFAULT 'bobby tables', age INT); + +-- Create a table with a default timestamp +CREATE TABLE clock ( + which_time TIMESTAMP DEFAULT current_timestamp() +); + +-- Create a table with mixing default value and constraints +CREATE TABLE clock ( + which_time TIMESTAMP CONSTRAINT clock_pk PRIMARY KEY DEFAULT current_timestamp() NOT NULL +); + +-- Creates a table using identifier +CREATE TABLE IDENTIFIER('student') (id INT, name STRING, age INT); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.sql new file mode 100644 index 000000000..99f9939a9 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.sql @@ -0,0 +1,27 @@ +-- Create volume `customer_vol`. +-- This throws exception if volume with name customer_vol already exists. +CREATE VOLUME customer_vol; + +-- Create volume `customer_vol` only if volume with same name doesn't exist. +CREATE VOLUME IF NOT EXISTS customer_vol; + +-- Create volume `customer_vol` only if volume with same name doesn't exist, +-- with a comment. +CREATE VOLUME IF NOT EXISTS customer_vol COMMENT 'This is customer volume'; + +-- Create external volume `customer_vol_external` +-- This throws exception if volume with name customer_vol_external +-- already exists. +CREATE EXTERNAL VOLUME customer_vol_external +LOCATION 's3://s3-path/'; + +-- Create external volume `customer_vol_external` +-- only if volume with same name doesn't exist, with a location. +CREATE EXTERNAL VOLUME IF NOT EXISTS customer_vol_external +LOCATION 's3://s3-path/'; + +-- Create external volume `customer_vol_external` +-- only if volume with same name doesn't exist, with a location and a comment. +CREATE EXTERNAL VOLUME IF NOT EXISTS customer_vol_external +LOCATION 's3://s3-path/' +COMMENT 'This is customer volume'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.sql new file mode 100644 index 000000000..f74d12812 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.sql @@ -0,0 +1,19 @@ +SELECT + my_table.a, + other_table.b +FROM my_table +LEFT JOIN other_table + ON DATEDIFF(SECOND, my_table.timestamp_a, other_table.timestamp_b) > 1; + +SELECT + DATE_ADD(MICROSECOND, 5, start_dt) AS date_add_micro, + DATE_DIFF(MILLISECOND, start_dt, end_dt) AS datediff_milli, + DATEADD(MINUTE, 5, start_dt) AS dateadd_min, + DATEDIFF(HOUR, start_dt, end_dt) AS datediff_hr, + TIMEDIFF(DAY, start_dt, end_dt) AS timediff_day, + TIMESTAMPADD(DAYOFYEAR, 5, start_dt) AS ts_add_day_of_yr, + TIMESTAMPDIFF(WEEK, start_dt, end_dt) AS ts_diff_week, + DATE_ADD(MONTH, 5, start_dt) AS date_add_month, + DATE_ADD(QUARTER, 5, start_dt) AS date_add_quarter, + DATE_ADD(YEAR, 5, start_dt) AS date_add_year +FROM my_table; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.sql new file mode 100644 index 000000000..dc45ce325 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.sql @@ -0,0 +1,7 @@ +DECLARE var; +DECLARE OR REPLACE var; +DECLARE OR REPLACE VARIABLE var; +DECLARE var INT DEFAULT 5; +DECLARE var INT = 5; +DECLARE var = 5; +DECLARE var DEFAULT 5; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.sql new file mode 100644 index 000000000..4fcd7d849 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.sql @@ -0,0 +1,2 @@ +-- Desribe the volume +DESCRIBE VOLUME VACCINE_VOLUME; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.sql new file mode 100644 index 000000000..a93a3fe0b --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.sql @@ -0,0 +1,5 @@ +-- Drop the catalog and its schemas +DROP CATALOG vaccine CASCADE; + +-- Drop the catalog using IF EXISTS and only if it is empty. +DROP CATALOG IF EXISTS vaccine RESTRICT; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.sql new file mode 100644 index 000000000..cd3ebede1 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.sql @@ -0,0 +1,5 @@ +-- Drop the volume +DROP VOLUME vaccine_volume; + +-- Drop the volume using IF EXISTS. +DROP VOLUME IF EXISTS vaccine_volume; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.sql new file mode 100644 index 000000000..9fafcadb0 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.sql @@ -0,0 +1,24 @@ +-- Databricks notebook source +-- MAGIC %md +-- MAGIC # Dummy Notebook + +-- COMMAND ---------- + +-- DBTITLE 1,Select Data + +SELECT x FROM y + +-- COMMAND ---------- + +-- MAGIC %python +-- MAGIC foo = 'bar' +-- MAGIC print(foo) + +-- COMMAND ---------- + +SELECT a FROM b; + +-- COMMAND ---------- + +-- MAGIC %sh +-- MAGIC echo heloworld diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.sql new file mode 100644 index 000000000..d59646f11 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.sql @@ -0,0 +1,7 @@ +--https://docs.databricks.com/en/sql/language-manual/sql-ref-function-invocation.html#named-parameter-invocation + +select my_function(arg1 => 3, arg2 => 4) from dual; + +select my_function(3, arg2 => 4) from dual; + +select my_function(arg1 => 3, 4) from dual; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.sql new file mode 100644 index 000000000..6d4df77d7 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.sql @@ -0,0 +1,38 @@ +-- Examples from https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-qry-select-pivot.html + +-- A very basic PIVOT +-- Given a table with sales by quarter, return a table that returns sales across quarters per year. +SELECT year, region, q1, q2, q3, q4 +FROM sales +PIVOT (sum(sales) AS sales + FOR quarter + IN (1 AS q1, 2 AS q2, 3 AS q3, 4 AS q4)); + +-- Also PIVOT on region +SELECT year, q1_east, q1_west, q2_east, q2_west, q3_east, q3_west, q4_east, q4_west +FROM sales +PIVOT (sum(sales) AS sales + FOR (quarter, region) + IN ((1, 'east') AS q1_east, (1, 'west') AS q1_west, (2, 'east') AS q2_east, (2, 'west') AS q2_west, + (3, 'east') AS q3_east, (3, 'west') AS q3_west, (4, 'east') AS q4_east, (4, 'west') AS q4_west)); + +-- To aggregate across regions the column must be removed from the input. +SELECT year, q1, q2, q3, q4 +FROM (SELECT year, quarter, sales FROM sales) AS s +PIVOT (sum(sales) AS sales + FOR quarter + IN (1 AS q1, 2 AS q2, 3 AS q3, 4 AS q4)); + +-- A PIVOT with multiple aggregations +SELECT year, q1_total, q1_avg, q2_total, q2_avg, q3_total, q3_avg, q4_total, q4_avg + FROM (SELECT year, quarter, sales FROM sales) AS s + PIVOT (sum(sales) AS total, avg(sales) AS avg + FOR quarter + IN (1 AS q1, 2 AS q2, 3 AS q3, 4 AS q4)); + +-- A PIVOT with anonymous columns +SELECT year, region, q1, q2, q3, q4 +FROM sales +PIVOT (sum(sales) + FOR quarter + IN (1 AS q1, 2 AS q2, 3 AS q3, 4 AS q4)); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/select.sql new file mode 100644 index 000000000..8d8cb0ef5 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select.sql @@ -0,0 +1,11 @@ +select * +from shopify_cz.order +; + +SELECT * +FROM IDENTIFIER('table_name') +; + +SELECT * +FROM IDENTIFIER('schema_name' || '.table_name') +; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.sql new file mode 100644 index 000000000..ab7643cb4 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.sql @@ -0,0 +1,114 @@ +SELECT + id, + name, + age, + class, + address, + c_age, + d_age +FROM person + LATERAL VIEW EXPLODE(ARRAY(30, 60)) tbl_name AS c_age + LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age; + +SELECT + c_age, + COUNT(*) AS record_count +FROM person + LATERAL VIEW EXPLODE(ARRAY(30, 60)) AS c_age + LATERAL VIEW EXPLODE(ARRAY(40, 80)) AS d_age +GROUP BY c_age; + +SELECT + id, + name, + age, + class, + address, + c_age, + d_age +FROM person + LATERAL VIEW EXPLODE(ARRAY()) tbl_name AS c_age; + +SELECT + id, + name, + age, + class, + address, + time, + c_age +FROM person + LATERAL VIEW OUTER EXPLODE(ARRAY()) tbl_name AS c_age; + +SELECT + id, + name, + age, + class, + address, + time, + c_age +FROM person + LATERAL VIEW OUTER EXPLODE(ARRAY()) tbl_name c_age; + +SELECT + id, + name, + age, + class, + address, + time, + c_age +FROM person + LATERAL VIEW OUTER EXPLODE(ARRAY()) c_age; + +SELECT + person.id, + exploded_people.name, + exploded_people.age, + exploded_people.state +FROM person + LATERAL VIEW INLINE(array_of_structs) exploded_people AS name, age, state; + +SELECT + p.id, + exploded_people.name, + exploded_people.age, + exploded_people.state +FROM person AS p + LATERAL VIEW INLINE(array_of_structs) exploded_people AS name, age, state; + +SELECT + p.id, + exploded_people.name, + exploded_people.age, + exploded_people.state +FROM person AS p + LATERAL VIEW INLINE(array_of_structs) exploded_people; + +SELECT + p.id, + exploded_people.name, + exploded_people.age, + exploded_people.state +FROM person AS p + LATERAL VIEW INLINE(array_of_structs) exploded_people name, age, state; + +SELECT + p.id, + exploded_people.name, + exploded_people.age, + exploded_people.state +FROM person AS p + LATERAL VIEW INLINE(array_of_structs) AS name, age, state; + +SELECT + t1.column1, + CAST(GET_JSON_OBJECT(things, '$.percentage') AS DECIMAL(16, 8) + ) AS ptc +FROM table1 AS t1 +LEFT JOIN table2 AS t2 + ON + c.column1 = p.column1 + AND t2.type = 'SOMETHING' + LATERAL VIEW OUTER EXPLODE(t2.column2) AS things; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.sql new file mode 100644 index 000000000..70c8590b5 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.sql @@ -0,0 +1,140 @@ +-- Sum of quantity per dealership. Group by `id`. +SELECT + id, + sum(quantity) AS sum_quantity +FROM dealer GROUP BY id ORDER BY id; + +-- Use column position in GROUP by clause. +SELECT + id, + sum(quantity) AS sum_quantity +FROM dealer GROUP BY 1 ORDER BY 1; + +-- Multiple aggregations. +-- 1. Sum of quantity per dealership. +-- 2. Max quantity per dealership. +SELECT + id, + sum(quantity) AS sum_quantity, + max(quantity) AS max_quantity +FROM dealer GROUP BY id ORDER BY id; + +-- Count the number of distinct dealer cities per car_model. +SELECT + car_model, + count(DISTINCT city) AS count_distinct_city +FROM dealer GROUP BY car_model; + +-- Sum of only 'Honda Civic' and 'Honda CRV' quantities per dealership. +SELECT + id, + sum(quantity) FILTER ( + WHERE car_model IN ('Honda Civic', 'Honda CRV') + ) AS `sum(quantity)` FROM dealer +GROUP BY id ORDER BY id; + +-- Aggregations using multiple sets of grouping columns in a single statement. +-- Following performs aggregations based on four sets of grouping columns. +-- 1. city, car_model +-- 2. city +-- 3. car_model +-- 4. Empty grouping set. Returns quantities for all city and car models. +SELECT + city, + car_model, + sum(quantity) AS sum_quantity +FROM dealer +GROUP BY GROUPING SETS ((city, car_model), (city), (car_model), ()) +ORDER BY city; + +SELECT + city, + car_model, + sum(quantity) AS sum_quantity +FROM dealer +GROUP BY city, car_model GROUPING SETS ((city, car_model), (city), (car_model), ()) +ORDER BY city; + +SELECT + city, + car_model, + sum(quantity) AS sum_quantity +FROM dealer +GROUP BY city, car_model, GROUPING SETS ((city, car_model), (city), (car_model), ()) +ORDER BY city; + +-- Group by processing with `ROLLUP` clause. +-- Equivalent GROUP BY GROUPING SETS ((city, car_model), (city), ()) +SELECT + city, + car_model, + sum(quantity) AS sum_quantity +FROM dealer +GROUP BY city, car_model WITH ROLLUP +ORDER BY city, car_model; + +-- Group by processing with `CUBE` clause. +-- Equivalent GROUP BY: +-- GROUPING SETS ((city, car_model), (city), (car_model), ()) +SELECT + city, + car_model, + sum(quantity) AS sum_quantity +FROM dealer +GROUP BY city, car_model WITH CUBE +ORDER BY city, car_model; + +-- Select the first row in column age +-- Implicit GROUP BY +SELECT first(age) FROM person; + +-- Implicit GROUP BY +SELECT + first(age IGNORE NULLS) AS first_age, + last(id) AS last_id, + sum(id) AS sum_id +FROM person; + +-- CUBE within GROUP BY clause +SELECT + name, + age, + count(*) AS record_count +FROM people +GROUP BY cube(name, age); + +-- CUBE within GROUP BY clause with single clause on newline +SELECT + name, + count(*) AS record_count +FROM people +GROUP BY cube( + name +); + +-- CUBE within GROUP BY clause with multiple clauses on newline +SELECT + name, + age, + count(*) AS record_count +FROM people +GROUP BY cube( + name, + age +); + +-- ROLLUP within GROUP BY clause +SELECT + name, + age, + count(*) AS record_count +FROM people +GROUP BY rollup(name, age); + +-- GROUP BY ALL +SELECT + name, + age, + count(*) AS record_count +FROM people +GROUP BY ALL; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.sql new file mode 100644 index 000000000..d413ea74c --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.sql @@ -0,0 +1,9 @@ +select +lag(test) +over (ORDER BY test) +from schema.test_table; + +select +lag(test) +over (PARTITION BY test ORDER BY test) +from schema.test_table; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.sql new file mode 100644 index 000000000..176ce3086 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.sql @@ -0,0 +1,5 @@ +SET TIME ZONE LOCAL; +SET TIME ZONE 'America/Los_Angeles'; +SET TIME ZONE '+08:00'; +SET TIME ZONE INTERVAL 1 HOUR 30 MINUTES; +SET TIME ZONE INTERVAL '08:30:00' HOUR TO SECOND; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.sql new file mode 100644 index 000000000..8142fbbdc --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.sql @@ -0,0 +1,20 @@ +-- simple assignment +SET VAR var1 = 5; + +-- A complex expression assignment +SET VARIABLE var1 = (SELECT max(c1) FROM VALUES(1), (2) AS t(c1)); + +-- resetting the variable to DEFAULT (set in declare) +SET VAR var1 = DEFAULT; + +-- A multi variable assignment +SET VAR (var1, var2, var3) = (VALUES(100,'x123',DEFAULT)); + +-- escpaed function name +SET VARIABLE `foo` = select 'bar'; + +-- function call +set var tz = current_timezone(); + +-- set multiple vars in one statement +set var x1 = 12, x2 = 'helloworld'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.sql new file mode 100644 index 000000000..103a1209b --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.sql @@ -0,0 +1,21 @@ +-- Lists all databases +SHOW DATABASES; + +-- List all databases from userdb catalog +SHOW DATABASES FROM userdb; + +-- List all databases in userdb catalog +SHOW DATABASES IN userdb; + +-- List all databases from default catalog matching the pattern `sam*` +SHOW DATABASES FROM default LIKE 'sam*'; + +-- List all databases from default catalog matching the pattern `sam*` +-- without LIKE keyword +SHOW DATABASES FROM default 'sam*'; + +-- List all databases matching the pattern `sam*|suj` without LIKE keyword +SHOW DATABASES 'sam*|suj'; + +-- Lists all databases. Keywords SCHEMAS and DATABASES are interchangeable. +SHOW SCHEMAS; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.sql new file mode 100644 index 000000000..c632024b3 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.sql @@ -0,0 +1,54 @@ +-- Unfiltered list of all functions; +SHOW FUNCTIONS; + +-- List a system function `trim` by searching both user defined and system +-- defined functions. +SHOW FUNCTIONS trim; + +SHOW ALL FUNCTIONS trim; + +-- List a system function `concat` by searching system defined functions. +SHOW SYSTEM FUNCTIONS concat; + +-- List a user function `concat_user` by searching user defined functions. +SHOW USER FUNCTIONS concat_user; + +-- List a qualified function `max` from database `salesdb`. +SHOW SYSTEM FUNCTIONS salesdb.max; + +-- List all functions starting with `t` +SHOW FUNCTIONS LIKE 't*'; + +-- List all functions starting with `t` without LIKE keyword +SHOW FUNCTIONS 't*'; + +-- List all user functions starting with `t` +SHOW USER FUNCTIONS LIKE 't*'; + +-- List all user functions starting with `t` without LIKE keyword +SHOW USER FUNCTIONS 't*'; + +-- List all functions starting with `yea` or `windo` +SHOW FUNCTIONS LIKE 'yea*|windo*'; + +-- Use normal regex pattern to list function names that has 4 characters +-- with `t` as the starting character. +SHOW FUNCTIONS LIKE 't[a-z][a-z][a-z]'; + +-- List all functions from default schema +SHOW FUNCTIONS FROM default; + +-- List all user functions from default schema +SHOW USER FUNCTIONS FROM default; + +-- List all functions from default schema starting with `t` +SHOW FUNCTIONS FROM default LIKE 't*'; + +-- List all functions from default schema starting with `t` without LIKE keyword +SHOW FUNCTIONS FROM default 't*'; + +-- List all user functions from default schema starting with `t` +SHOW USER FUNCTIONS FROM default LIKE 't*'; + +-- List all user functions from default schema starting with `t` without LIKE keyword +SHOW USER FUNCTIONS FROM default 't*'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.sql new file mode 100644 index 000000000..987531a16 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.sql @@ -0,0 +1,18 @@ +-- List all tables in default database +SHOW TABLES; + +-- List all tables from userdb database +SHOW TABLES FROM userdb; + +-- List all tables in userdb database +SHOW TABLES IN userdb; + +-- List all tables from default database matching the pattern `sam*` +SHOW TABLES FROM default LIKE 'sam*'; + +-- List all tables from default database matching the pattern `sam*` +-- without LIKE keyword +SHOW TABLES FROM default 'sam*'; + +-- List all tables matching the pattern `sam*|suj` without LIKE keyword +SHOW TABLES 'sam*|suj'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.sql new file mode 100644 index 000000000..b4d370fb3 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.sql @@ -0,0 +1,23 @@ +-- List all views in default database +SHOW VIEWS; + +-- List all views from userdb database +SHOW VIEWS FROM userdb; + +-- List all views in global temp view database +SHOW VIEWS IN global_temp; + +-- List all views from default database matching the pattern `sam*` +SHOW VIEWS FROM default LIKE 'sam*'; + +-- List all views from the current database +-- matching the pattern `sam|suj|temp*` +SHOW VIEWS LIKE 'sam|suj|temp*'; + +-- List all views from default database matching the pattern `sam*` +-- without LIKE keyword +SHOW VIEWS FROM default 'sam*'; + +-- List all views from the current database +-- matching the pattern `sam|suj|temp*` without LIKE keyword +SHOW VIEWS 'sam|suj|temp*'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.sql new file mode 100644 index 000000000..bae412208 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.sql @@ -0,0 +1,17 @@ +SHOW VOLUMES; + +SHOW VOLUMES IN sampledb; + +SHOW VOLUMES FROM sampledb; + +SHOW VOLUMES LIKE 'regex*'; + +SHOW VOLUMES 'regex*'; + +SHOW VOLUMES IN sampledb LIKE 'regex*'; + +SHOW VOLUMES IN sampledb 'regex*'; + +SHOW VOLUMES FROM sampledb LIKE 'regex*'; + +SHOW VOLUMES FROM sampledb 'regex*'; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.sql new file mode 100644 index 000000000..21e892c5b --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.sql @@ -0,0 +1,10 @@ +SELECT * + FROM sales UNPIVOT INCLUDE NULLS + (sales FOR quarter IN (q1 AS `Jan-Mar`, + q2 AS `Apr-Jun`, + q3 AS `Jul-Sep`, + sales.q4 AS `Oct-Dec`)); + +SELECT * + FROM oncall UNPIVOT ((name, email, phone) FOR precedence IN ((name1, email1, phone1) AS primary, + (name2, email2, phone2) AS secondary)); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.sql new file mode 100644 index 000000000..39b5afcfd --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.sql @@ -0,0 +1,11 @@ +USE CATALOG catalog_name; + +-- Use the 'hive_metastore' . +USE CATALOG hive_metastore; + +USE CATALOG 'hive_metastore'; + +-- Use the 'some_catalog' +USE CATALOG `some_catalog`; + +USE CATALOG some_cat; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.sql b/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.sql new file mode 100644 index 000000000..2bc4e4cc1 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.sql @@ -0,0 +1,18 @@ +USE database_name; + +-- Use the 'userdb' +USE userdb; + +-- Use the 'userdb1' +USE userdb1; + +-- Keywords SCHEMA and DATABASE are interchangeable. +USE DATABASE database_name; + +USE SCHEMA database_name; + +USE IDENTIFIER('database_name'); + +USE DATABASE IDENTIFIER('database_name'); + +USE SCHEMA IDENTIFIER('database_name'); From d81fc1c7a3aabfd653c587c2816fd7b6002d1ad7 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Tue, 17 Dec 2024 12:01:15 +0100 Subject: [PATCH 03/19] feat: added all tests and started implemented dialect --- crates/lib-dialects/src/databricks.rs | 302 +++++- .../dialects/databricks/alter_catalog.yml | 99 ++ .../dialects/databricks/alter_database.yml | 144 +++ .../dialects/databricks/alter_table.yml | 521 +++++++++++ .../dialects/databricks/alter_view.yml | 184 ++++ .../dialects/databricks/alter_volume.yml | 85 ++ .../databricks/command_terminator.yml | 40 + .../dialects/databricks/comment_on.yml | 72 ++ .../dialects/databricks/create_catalog.yml | 22 + .../dialects/databricks/create_database.yml | 118 +++ .../dialects/databricks/create_function.yml | 341 +++++++ .../dialects/databricks/create_table.yml | 362 ++++++++ .../dialects/databricks/create_volume.yml | 51 ++ .../dialects/databricks/date_functions.yml | 281 ++++++ .../declare_or_replace_variable.yml | 38 + .../dialects/databricks/describe_volume.yml | 8 + .../dialects/databricks/drop_catalog.yml | 14 + .../dialects/databricks/drop_volume.yml | 12 + .../dialects/databricks/magic_line.yml | 21 + .../dialects/databricks/named_argument.yml | 89 ++ .../fixtures/dialects/databricks/pivot.yml | 563 ++++++++++++ .../fixtures/dialects/databricks/select.yml | 68 ++ .../databricks/select_from_lateral_view.yml | 769 ++++++++++++++++ .../dialects/databricks/select_group_by.yml | 857 ++++++++++++++++++ .../dialects/databricks/select_window.yml | 77 ++ .../dialects/databricks/set_time_zone.yml | 38 + .../dialects/databricks/set_variable.yml | 89 ++ .../dialects/databricks/show_databases.yml | 39 + .../dialects/databricks/show_functions.yml | 100 ++ .../dialects/databricks/show_tables.yml | 46 + .../dialects/databricks/show_views.yml | 53 ++ .../dialects/databricks/show_volumes.yml | 50 + .../fixtures/dialects/databricks/unpivot.yml | 99 ++ .../dialects/databricks/use_catalog.yml | 25 + .../dialects/databricks/use_database.yml | 51 ++ 35 files changed, 5725 insertions(+), 3 deletions(-) create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/create_function.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/create_table.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/pivot.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/select.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/select_window.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml create mode 100644 crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index e097253e9..cd06a09c7 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -1,10 +1,306 @@ -use sqruff_lib_core::dialects::{base::Dialect, init::DialectKind}; +use sqruff_lib_core::{ + dialects::{base::Dialect, init::DialectKind, syntax::SyntaxKind}, + helpers::{Config, ToMatchable}, + parser::{ + grammar::{anyof::one_of, base::Ref, sequence::Sequence}, + lexer::Matcher, + }, + vec_of_erased, +}; + +use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS}; pub fn dialect() -> Dialect { - let sparksql = crate::sparksql::dialect(); + let raw_sparksql = crate::sparksql::dialect(); - let mut databricks = sparksql; + let mut databricks = crate::sparksql::dialect(); databricks.name = DialectKind::Databricks; + // databricks + // .sets_mut("unreserverd_keywords") + // .extend(UNRESERVED_KEYWORDS); + // databricks + // .sets_mut("unreserverd_keywords") + // .extend(raw_sparksql.sets("reserverd_keywords")); + // databricks.sets_ut("unreserverd_keywords") + + // databricks.sets_mut("reserverd_keywords").clear(); + // databricks.sets_mut("reserverd_keywords").extend(RESERVED_KEYWORDS); + + // databricks.sets_mut("data_part_function_name").extend(["TIMEDIFF"]); + + // Named Function Parameters: + // https://docs.databricks.com/en/sql/language-manual/sql-ref-function-invocation.html#named-parameter-invocation + databricks.insert_lexer_matchers( + vec![Matcher::string("right_array", "=>", SyntaxKind::RightArrow)], + "equals", + ); + + // Notebook Cell Delimiter: + // https://learn.microsoft.com/en-us/azure/databricks/notebooks/notebook-export-import#sql-1 + // // databricks.insert_lexer_matchers( + // vec![Match::regex( + // "command", + // r"(\r?\n){2}-- COMMAND ----------(\r?\n)", + // SyntaxKind::Code, + // )], + // "newline", + // ); + + // Datbricks Notebook Start: + // Needed to insert "so early" to avoid magic + notebook + // start to be interpreted as inline comment + databrikcs.insert_lexer_matchers( + vec![ + Matcher::regex( + "notebook_start", + r"-- Databricks notebook source(\r?\n){1}", + SyntaxKind::CommentStatement, + ), + Matcher::regex( + "magic_line", + r"(-- MAGIC)( [^%]{1})([^\n]*)", + SyntaxKind::Code, + ), + Matcher::regex( + "magic_start", + r"(-- MAGIC %)([^\n]{2,})(\r?\n)", + SyntaxKind::CodeSegment, + ), + ], + "inline_comment", + ); + + databricks.add([ + ( + "SetTagsGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::keyword("TAGS"), + Ref::new("BracketedPropertyListGrammar"), + ]) + .to_matchable() + .into(), + ), + ( + "UnsetTagsGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("UNSET"), + Ref::keyword("TAGS"), + Ref::new("BracketedPropertyNameListGrammar"), + ]) + .to_matchable() + .into(), + ), + ( + "ColumnDefaultGrammar".into(), + one_of(vec_of_erased!( + Ref::new("LiteralGrammar"), + Ref::new("FucntionSegmenet"), + )) + .to_matchable() + .into(), + ), + ( + "ConstraintOptionGrammar".into(), + Sequence::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("ENABLE"), + Ref::keyword("NOVALIDATE") + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("NOT"), + Ref::keyword("ENFORCED") + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![Ref::keyword("DEFERRABLE")]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("INITIALLY"), + Ref::keyword("DEFERRED") + ]) + .config(|config| { config.optional() }), + one_of(vec_of_erased![Ref::keyword("NORELY"), Ref::keyword("RELY"),]) + .config(|config| { config.optional() }), + ]) + .to_matchable() + .into(), + ), + ( + "ForeignKeyOptionGrammar".into(), + Sequence::new(vec_of_erased![ + Sequence::new(vec_of_erased![Ref::keyword("MATCH"), Ref::keyword("FULL"),]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("ON"), + Ref::keyword("UPDATE"), + Ref::keyword("NO"), + Ref::keyword("ACTION"), + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("ON"), + Ref::keyword("DELETE"), + Ref::keyword("NO"), + Ref::keyword("ACTION"), + ]), + ]), + ), + // DropConstraintGrammar=Sequence( + // "DROP", + // OneOf( + // Sequence( + // Ref("PrimaryKeyGrammar"), + // Ref("IfExistsGrammar", optional=True), + // OneOf( + // "RESTRICT", + // "CASCADE", + // optional=True, + // ), + // ), + // Sequence( + // Ref("ForeignKeyGrammar"), + // Ref("IfExistsGrammar", optional=True), + // Bracketed( + // Delimited( + // Ref("ColumnReferenceSegment"), + // ) + // ), + // ), + // Sequence( + // "CONSTRAINT", + // Ref("IfExistsGrammar", optional=True), + // Ref("ObjectReferenceSegment"), + // OneOf( + // "RESTRICT", + // "CASCADE", + // optional=True, + // ), + // ), + // ), + // ), + ( + "DropConstraintGrammar".into(), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("PrimaryKeyGrammar"), + Ref::new("IfExistsGrammar").optional(), + one_of(vec_of_erased![ + Ref::keyword("RESTRICT"), + Ref::keyword("CASCADE"), + ]) + .config(|config| config.optional()), + ]), + Sequence::new(vec_of_erased![ + Ref::new("ForeignKeyGrammar"), + Ref::new("IfExistsGrammar").optional(), + Ref::new("Bracketed").config(|config| { + config.set_children(vec_of_erased![Ref::new("ColumnReferenceSegment")]) + }), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("CONSTRAINT"), + Ref::new("IfExistsGrammar").optional(), + Ref::new("ObjectReferenceSegment"), + one_of(vec_of_erased![ + Ref::keyword("RESTRICT"), + Ref::keyword("CASCADE"), + ]) + .config(|config| config.optional()), + ]), + ]) + .to_matchable() + .into(), + ), + // AlterPartitionGrammar=Sequence( + // "PARTITION", + // Bracketed( + // Delimited( + // AnyNumberOf( + // OneOf( + // Ref("ColumnReferenceSegment"), + // Ref("SetClauseSegment"), + // ), + // min_times=1, + // ), + // ), + // ), + // ), + // RowFilterClauseGrammar=Sequence( + // "ROW", + // "FILTER", + // Ref("ObjectReferenceSegment"), + // "ON", + // Bracketed( + // Delimited( + // OneOf( + // Ref("ColumnReferenceSegment"), + // Ref("LiteralGrammar"), + // ), + // optional=True, + // ), + // ), + // ), + // PropertiesBackTickedIdentifierSegment=RegexParser( + // r"`.+`", + // IdentifierSegment, + // type="properties_naked_identifier", + // ), + // LocationWithCredentialGrammar=Sequence( + // "LOCATION", + // Ref("QuotedLiteralSegment"), + // Sequence( + // "WITH", + // Bracketed( + // "CREDENTIAL", + // Ref("PrincipalIdentifierSegment"), + // ), + // optional=True, + // ), + // ), + // NotebookStart=TypedParser("notebook_start", CommentSegment, type="notebook_start"), + // MagicLineGrammar=TypedParser("magic_line", CodeSegment, type="magic_line"), + // MagicStartGrammar=TypedParser("magic_start", CodeSegment, type="magic_start"), + // VariableNameIdentifierSegment=OneOf( + // Ref("NakedIdentifierSegment"), + // Ref("BackQuotedIdentifierSegment"), + // ), + ]); + + databricks.add([ + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-views.html + // Only difference between this and the SparkSQL version: + // - `LIKE` keyword is optional + ( + "ShowViewsGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("VIEWS"), + Sequence::new(vec_of_erased![one_of(vec_of_erased![ + Ref::keyword("FROM"), + Ref::keyword("IN"), + ])]) + .config(|config| { + config.optional(); + }), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE").optional(), + Ref::new("QuotedLiteralSegment"), + ]) + .config(|config| { config.optional() }) + ]) + .to_matchable() + .into(), + ), + // TODO Missing Show Object Grammar + ( + "NotNullGrammar".into(), + Sequence::new(vec_of_erased![Ref::keyword("NOT"), Ref::keyword("NULL")]) + .to_matchable() + .into(), + ), + // TODO Function NameIdentifierSegment + ]); + return databricks; } diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml new file mode 100644 index 000000000..85dc97f49 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml @@ -0,0 +1,99 @@ +file: +- unparsable: + - word: ALTER + - word: CATALOG + - word: some_cat + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: CATALOG + - word: some_cat + - word: OWNER + - word: TO + - word: my_group + - semicolon: ; + - word: ALTER + - word: CATALOG + - word: some_cat + - word: SET + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: CATALOG + - word: some_cat + - word: SET + - word: OWNER + - word: TO + - word: my_group + - semicolon: ; + - word: ALTER + - word: CATALOG + - word: some_cat + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - raw_comparison_operator: = + - single_quote: '''value1''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: CATALOG + - word: some_cat + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag2''' + - raw_comparison_operator: = + - single_quote: '''value2''' + - comma: ',' + - single_quote: '''tag3''' + - raw_comparison_operator: = + - single_quote: '''value3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: CATALOG + - word: some_cat + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: CATALOG + - word: some_cat + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag2''' + - comma: ',' + - single_quote: '''tag3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: CATALOG + - word: some_cat + - word: ENABLE + - word: PREDICTIVE + - word: OPTIMIZATION + - semicolon: ; + - word: ALTER + - word: CATALOG + - word: some_cat + - word: INHERIT + - word: PREDICTIVE + - word: OPTIMIZATION + - semicolon: ; + - word: ALTER + - word: CATALOG + - word: some_cat + - word: DISABLE + - word: PREDICTIVE + - word: OPTIMIZATION + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml new file mode 100644 index 000000000..e2dcb1c92 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml @@ -0,0 +1,144 @@ +file: +- unparsable: + - word: ALTER + - word: SCHEMA + - word: some_cat + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: SCHEMA + - word: some_cat + - word: OWNER + - word: TO + - word: my_group + - semicolon: ; + - word: ALTER + - word: SCHEMA + - word: some_cat + - word: SET + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: SCHEMA + - word: some_cat + - word: SET + - word: OWNER + - word: TO + - word: my_group + - semicolon: ; + - word: ALTER + - word: DATABASE + - word: some_cat + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: DATABASE + - word: some_cat + - word: SET + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: DATABASE + - word: some_cat + - word: OWNER + - word: TO + - word: my_group + - semicolon: ; + - word: ALTER + - word: DATABASE + - word: some_cat + - word: SET + - word: OWNER + - word: TO + - word: my_group + - semicolon: ; + - word: ALTER + - word: SCHEMA + - word: some_cat + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - raw_comparison_operator: = + - single_quote: '''value1''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: DATABASE + - word: some_cat + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag2''' + - raw_comparison_operator: = + - single_quote: '''value2''' + - comma: ',' + - single_quote: '''tag3''' + - raw_comparison_operator: = + - single_quote: '''value3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: DATABASE + - word: some_cat + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: SCHEMA + - word: some_cat + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag2''' + - comma: ',' + - single_quote: '''tag3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: SCHEMA + - word: some_cat + - word: ENABLE + - word: PREDICTIVE + - word: OPTIMIZATION + - semicolon: ; + - word: ALTER + - word: DATABASE + - word: some_cat + - word: INHERIT + - word: PREDICTIVE + - word: OPTIMIZATION + - semicolon: ; + - word: ALTER + - word: SCHEMA + - word: some_cat + - word: DISABLE + - word: PREDICTIVE + - word: OPTIMIZATION + - semicolon: ; + - word: ALTER + - word: SCHEMA + - word: some_cat + - word: SET + - word: DBPROPERTIES + - start_bracket: ( + - single_quote: '''Edited-by''' + - raw_comparison_operator: = + - single_quote: '''John Doe''' + - comma: ',' + - single_quote: '''Edit-date''' + - raw_comparison_operator: = + - single_quote: '''2020-01-01''' + - end_bracket: ) + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml new file mode 100644 index 000000000..13c021adc --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml @@ -0,0 +1,521 @@ +file: +- statement: + - alter_table_statement: + - keyword: ALTER + - keyword: TABLE + - table_reference: + - naked_identifier: Student + - keyword: RENAME + - keyword: TO + - table_reference: + - naked_identifier: StudentInfo +- statement_terminator: ; +- statement: + - alter_table_statement: + - keyword: ALTER + - keyword: TABLE + - table_reference: + - naked_identifier: default + - dot: . + - naked_identifier: StudentInfo + - keyword: PARTITION + - bracketed: + - start_bracket: ( + - column_reference: + - naked_identifier: age + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''10''' + - end_bracket: ) + - keyword: RENAME + - keyword: TO + - keyword: PARTITION + - bracketed: + - start_bracket: ( + - column_reference: + - naked_identifier: age + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''15''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - alter_table_statement: + - keyword: ALTER + - keyword: TABLE + - table_reference: + - naked_identifier: StudentInfo + - keyword: ADD + - keyword: columns + - bracketed: + - start_bracket: ( + - column_definition: + - column_reference: + - naked_identifier: LastName + - data_type: + - primitive_type: + - keyword: string + - comma: ',' + - column_definition: + - column_reference: + - naked_identifier: DOB + - data_type: + - primitive_type: + - keyword: timestamp + - end_bracket: ) +- statement_terminator: ; +- file: + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: DROP + - word: COLUMN + - start_bracket: ( + - word: DOB + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: DROP + - word: COLUMNS + - word: IF + - word: EXISTS + - start_bracket: ( + - word: LastName + - comma: ',' + - word: DOB + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: ADD + - word: IF + - word: NOT + - word: EXISTS + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - numeric_literal: '18' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: DROP + - word: IF + - word: EXISTS + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - numeric_literal: '18' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: ADD + - word: IF + - word: NOT + - word: EXISTS + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - numeric_literal: '18' + - end_bracket: ) + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - numeric_literal: '20' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: RECOVER + - word: PARTITIONS + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: ALTER + - word: COLUMN + - word: name + - word: COMMENT + - double_quote: '"new comment"' + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: RENAME + - word: COLUMN + - word: name + - word: TO + - word: FirstName + - semicolon: ; + - word: ALTER + - word: TABLE + - word: dbx + - dot: . + - word: tab1 + - word: PARTITION + - start_bracket: ( + - word: a + - raw_comparison_operator: = + - single_quote: '''1''' + - comma: ',' + - word: b + - raw_comparison_operator: = + - single_quote: '''2''' + - end_bracket: ) + - word: SET + - word: LOCATION + - single_quote: '''/path/to/part/ways''' + - semicolon: ; + - word: ALTER + - word: TABLE + - word: test_tab + - word: SET + - word: SERDE + - single_quote: '''org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe''' + - semicolon: ; + - word: ALTER + - word: TABLE + - word: dbx + - dot: . + - word: tab1 + - word: SET + - word: SERDE + - single_quote: '''org.apache.hadoop''' + - word: WITH + - word: SERDEPROPERTIES + - start_bracket: ( + - single_quote: '''k''' + - raw_comparison_operator: = + - single_quote: '''v''' + - comma: ',' + - single_quote: '''kay''' + - raw_comparison_operator: = + - single_quote: '''vee''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: dbx + - dot: . + - word: tab1 + - word: SET + - word: TBLPROPERTIES + - start_bracket: ( + - single_quote: '''winner''' + - raw_comparison_operator: = + - single_quote: '''loser''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: dbx + - dot: . + - word: tab1 + - word: UNSET + - word: TBLPROPERTIES + - start_bracket: ( + - single_quote: '''winner''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: my_table + - word: DROP + - word: FEATURE + - word: deletionVectors + - semicolon: ; + - word: ALTER + - word: TABLE + - word: my_table + - word: DROP + - word: FEATURE + - word: deletionVectors + - word: TRUNCATE + - word: HISTORY + - semicolon: ; + - word: ALTER + - word: TABLE + - word: test + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - raw_comparison_operator: = + - single_quote: '''val1''' + - comma: ',' + - single_quote: '''tag2''' + - raw_comparison_operator: = + - single_quote: '''val2''' + - comma: ',' + - single_quote: '''tag3''' + - raw_comparison_operator: = + - single_quote: '''val3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: test + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - comma: ',' + - single_quote: '''tag2''' + - comma: ',' + - single_quote: '''tag3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: main + - dot: . + - word: schema1 + - dot: . + - word: test + - word: ALTER + - word: COLUMN + - word: col1 + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - raw_comparison_operator: = + - single_quote: '''val1''' + - comma: ',' + - single_quote: '''tag2''' + - raw_comparison_operator: = + - single_quote: '''val2''' + - comma: ',' + - single_quote: '''tag3''' + - raw_comparison_operator: = + - single_quote: '''val3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: main + - dot: . + - word: schema1 + - dot: . + - word: test + - word: ALTER + - word: COLUMN + - word: col1 + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - comma: ',' + - single_quote: '''tag2''' + - comma: ',' + - single_quote: '''tag3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: my_table + - word: ENABLE + - word: PREDICTIVE + - word: OPTIMIZATION + - semicolon: ; + - word: ALTER + - word: TABLE + - word: sales + - word: SET + - word: ROW + - word: FILTER + - word: us_filter + - word: ON + - start_bracket: ( + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: sales + - word: SET + - word: ROW + - word: FILTER + - word: us_filter + - word: ON + - start_bracket: ( + - word: region + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: sales + - word: DROP + - word: ROW + - word: FILTER + - semicolon: ; + - word: ALTER + - word: TABLE + - word: users + - word: ALTER + - word: COLUMN + - word: ssn + - word: SET + - word: MASK + - word: ssn_mask + - semicolon: ; + - word: ALTER + - word: TABLE + - word: users + - word: ALTER + - word: COLUMN + - word: ssn + - word: SET + - word: MASK + - word: ssn_mask + - word: USING + - word: COLUMNS + - start_bracket: ( + - word: ssn_value + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: users + - word: ALTER + - word: COLUMN + - word: ssn + - word: DROP + - word: MASK + - semicolon: ; + - word: ALTER + - word: TABLE + - word: persons + - word: ADD + - word: CONSTRAINT + - word: persons_pk + - word: PRIMARY + - word: KEY + - start_bracket: ( + - word: first_name + - comma: ',' + - word: last_name + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: pets + - word: ADD + - word: CONSTRAINT + - word: pets_persons_fk + - word: FOREIGN + - word: KEY + - start_bracket: ( + - word: owner_first_name + - comma: ',' + - word: owner_last_name + - end_bracket: ) + - word: REFERENCES + - word: persons + - word: NOT + - word: ENFORCED + - word: RELY + - semicolon: ; + - word: ALTER + - word: TABLE + - word: pets + - word: ADD + - word: CONSTRAINT + - word: pets_name_not_cute_chk + - word: CHECK + - start_bracket: ( + - word: length + - start_bracket: ( + - word: name + - end_bracket: ) + - raw_comparison_operator: < + - numeric_literal: '20' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: pets + - word: DROP + - word: CONSTRAINT + - word: pets_name_not_cute_chk + - semicolon: ; + - word: ALTER + - word: TABLE + - word: persons + - word: DROP + - word: CONSTRAINT + - word: persons_pk + - word: RESTRICT + - semicolon: ; + - word: ALTER + - word: TABLE + - word: pets + - word: DROP + - word: FOREIGN + - word: KEY + - word: IF + - word: EXISTS + - start_bracket: ( + - word: owner_first_name + - comma: ',' + - word: owner_last_name + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: persons + - word: DROP + - word: PRIMARY + - word: KEY + - word: CASCADE + - semicolon: ; + - word: ALTER + - word: TABLE + - word: rocks + - word: DROP + - word: COLUMN + - word: rock + - semicolon: ; + - word: ALTER + - word: TABLE + - word: rocks + - word: DROP + - word: COLUMN + - word: rock + - comma: ',' + - word: loc + - semicolon: ; + - word: ALTER + - word: TABLE + - word: rocks + - word: DROP + - word: COLUMN + - word: IF + - word: EXISTS + - word: rock + - comma: ',' + - word: loc + - semicolon: ; + - word: ALTER + - word: TABLE + - word: rocks + - word: DROP + - word: COLUMN + - word: IF + - word: EXISTS + - start_bracket: ( + - word: rock + - comma: ',' + - word: loc + - end_bracket: ) + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.yml new file mode 100644 index 000000000..3503139a0 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_view.yml @@ -0,0 +1,184 @@ +file: +- statement: + - alter_view_statement: + - keyword: ALTER + - keyword: VIEW + - table_reference: + - naked_identifier: tempsc1 + - dot: . + - naked_identifier: v1 + - keyword: RENAME + - keyword: TO + - table_reference: + - naked_identifier: tempsc1 + - dot: . + - naked_identifier: v2 +- statement_terminator: ; +- file: + - word: ALTER + - word: VIEW + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''tempsc1.v1''' + - end_bracket: ) + - word: RENAME + - word: TO + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''tempsc1.v2''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VIEW + - word: tempsc1 + - dot: . + - word: v2 + - word: SET + - word: TBLPROPERTIES + - start_bracket: ( + - single_quote: '''created.by.user''' + - raw_comparison_operator: = + - double_quote: '"John"' + - comma: ',' + - single_quote: '''created.date''' + - raw_comparison_operator: = + - single_quote: '''01-01-2001''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VIEW + - word: tempsc1 + - dot: . + - word: v2 + - word: UNSET + - word: TBLPROPERTIES + - start_bracket: ( + - back_quote: '`created`' + - dot: . + - back_quote: '`by`' + - dot: . + - back_quote: '`user`' + - comma: ',' + - word: created + - dot: . + - word: date + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VIEW + - word: tempsc1 + - dot: . + - word: v2 + - word: AS + - word: SELECT + - star: '*' + - word: FROM + - word: tempsc1 + - dot: . + - word: v1 + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: SET + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: WITH + - word: SCHEMA + - word: BINDING + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: WITH + - word: SCHEMA + - word: COMPENSATION + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: WITH + - word: SCHEMA + - word: TYPE + - word: EVOLUTION + - semicolon: ; + - word: ALTER + - word: VIEW + - word: v1 + - word: WITH + - word: SCHEMA + - word: EVOLUTION + - semicolon: ; + - word: ALTER + - word: MATERIALIZED + - word: VIEW + - word: my_mv + - word: ADD + - word: SCHEDULE + - word: CRON + - single_quote: '''0 0 0 * * ? *''' + - word: AT + - word: TIME + - word: ZONE + - single_quote: '''America/Los_Angeles''' + - semicolon: ; + - word: ALTER + - word: MATERIALIZED + - word: VIEW + - word: my_mv + - word: ALTER + - word: SCHEDULE + - word: CRON + - single_quote: '''0 0/15 * * * ? *''' + - semicolon: ; + - word: ALTER + - word: MATERIALIZED + - word: VIEW + - word: my_mv + - word: DROP + - word: SCHEDULE + - semicolon: ; + - word: ALTER + - word: VIEW + - word: test + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - raw_comparison_operator: = + - single_quote: '''val1''' + - comma: ',' + - single_quote: '''tag2''' + - raw_comparison_operator: = + - single_quote: '''val2''' + - comma: ',' + - single_quote: '''tag3''' + - raw_comparison_operator: = + - single_quote: '''val3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VIEW + - word: test + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - comma: ',' + - single_quote: '''tag2''' + - comma: ',' + - single_quote: '''tag3''' + - end_bracket: ) + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml new file mode 100644 index 000000000..5c6fa05c9 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml @@ -0,0 +1,85 @@ +file: +- unparsable: + - word: ALTER + - word: VOLUME + - word: some_vol + - word: RENAME + - word: TO + - word: some_new_vol + - semicolon: ; + - word: ALTER + - word: VOLUME + - word: some_vol + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: VOLUME + - word: some_vol + - word: OWNER + - word: TO + - word: my_group + - semicolon: ; + - word: ALTER + - word: VOLUME + - word: some_vol + - word: SET + - word: OWNER + - word: TO + - back_quote: '`alf@melmak.et`' + - semicolon: ; + - word: ALTER + - word: VOLUME + - word: some_vol + - word: SET + - word: OWNER + - word: TO + - word: my_group + - semicolon: ; + - word: ALTER + - word: VOLUME + - word: some_vol + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - raw_comparison_operator: = + - single_quote: '''value1''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VOLUME + - word: some_vol + - word: SET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag2''' + - raw_comparison_operator: = + - single_quote: '''value2''' + - comma: ',' + - single_quote: '''tag3''' + - raw_comparison_operator: = + - single_quote: '''value3''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VOLUME + - word: some_vol + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag1''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: VOLUME + - word: some_vol + - word: UNSET + - word: TAGS + - start_bracket: ( + - single_quote: '''tag2''' + - comma: ',' + - single_quote: '''tag3''' + - end_bracket: ) + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.yml new file mode 100644 index 000000000..96c7c58ca --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/command_terminator.yml @@ -0,0 +1,40 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: COL1 + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: TABLE1 + - unparsable: + - word: SELECT + - word: COL2 + - word: FROM + - word: TABLE2 + - word: SELECT + - word: COL3 + - word: FROM + - word: TABLE3 +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: COL4 + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: TABLE4 +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml new file mode 100644 index 000000000..2e09a2b92 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml @@ -0,0 +1,72 @@ +file: +- unparsable: + - word: COMMENT + - word: ON + - word: CATALOG + - word: my_catalog + - word: IS + - single_quote: '''This is my catalog''' + - semicolon: ; + - word: COMMENT + - word: ON + - word: CONNECTION + - word: mysql_connection + - word: IS + - single_quote: '''this is a mysql connection''' + - semicolon: ; + - word: COMMENT + - word: ON + - word: SCHEMA + - word: my_schema + - word: IS + - single_quote: '''This is my schema''' + - semicolon: ; + - word: COMMENT + - word: ON + - word: DATABASE + - word: my_other_schema + - word: IS + - single_quote: '''This is my other schema''' + - semicolon: ; + - word: COMMENT + - word: ON + - word: TABLE + - word: my_table + - word: IS + - single_quote: '''This is my table''' + - semicolon: ; + - word: COMMENT + - word: ON + - word: TABLE + - word: my_table + - word: IS + - word: 'NULL' + - semicolon: ; + - word: COMMENT + - word: ON + - word: SHARE + - word: my_share + - word: IS + - single_quote: '''A good share''' + - semicolon: ; + - word: COMMENT + - word: ON + - word: RECIPIENT + - word: my_recipient + - word: IS + - single_quote: '''A good recipient''' + - semicolon: ; + - word: COMMENT + - word: ON + - word: PROVIDER + - word: my_provider + - word: IS + - single_quote: '''A good provider''' + - semicolon: ; + - word: COMMENT + - word: ON + - word: VOLUME + - word: my_volume + - word: IS + - single_quote: '''Huge volume''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml new file mode 100644 index 000000000..d6177eeb2 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml @@ -0,0 +1,22 @@ +file: +- unparsable: + - word: CREATE + - word: CATALOG + - word: customer_cat + - semicolon: ; + - word: CREATE + - word: CATALOG + - word: IF + - word: NOT + - word: EXISTS + - word: customer_cat + - semicolon: ; + - word: CREATE + - word: CATALOG + - word: IF + - word: NOT + - word: EXISTS + - word: customer_cat + - word: COMMENT + - single_quote: '''This is customer catalog''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml new file mode 100644 index 000000000..e293ff50d --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml @@ -0,0 +1,118 @@ +file: +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: DATABASE + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - database_reference: + - naked_identifier: database_name + - keyword: COMMENT + - quoted_literal: '"database_comment"' + - keyword: LOCATION + - quoted_literal: '"root/database_directory"' + - keyword: WITH + - keyword: DBPROPERTIES + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '"property_name"' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '"property_value"' + - end_bracket: ) +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: SCHEMA + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - database_reference: + - naked_identifier: database_name + - keyword: COMMENT + - quoted_literal: '"database_comment"' + - keyword: LOCATION + - quoted_literal: '"root/database_directory"' + - keyword: WITH + - keyword: DBPROPERTIES + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '"property_name"' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '"property_value"' + - end_bracket: ) +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: DATABASE + - database_reference: + - naked_identifier: customer_db +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: DATABASE + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - database_reference: + - naked_identifier: customer_db +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: DATABASE + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - database_reference: + - naked_identifier: customer_db + - keyword: COMMENT + - quoted_literal: '''This is customer database''' + - keyword: LOCATION + - quoted_literal: '''/user''' + - keyword: WITH + - keyword: DBPROPERTIES + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '"ID"' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '"001"' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '"Name"' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''John''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: DATABASE + - database_reference: + - naked_identifier: inventory_db + - keyword: COMMENT + - quoted_literal: '''This database is used to maintain Inventory''' +- statement_terminator: ; +- statement: + - create_database_statement: + - keyword: CREATE + - keyword: SCHEMA + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - database_reference: + - naked_identifier: database_name +- file: + - word: MANAGED + - word: LOCATION + - double_quote: '"s3://root_database_bucket/"' diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.yml new file mode 100644 index 000000000..c85997cc3 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_function.yml @@ -0,0 +1,341 @@ +file: +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: OR + - keyword: REPLACE + - keyword: TEMPORARY + - keyword: FUNCTION + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - function_name_identifier: function_name + - keyword: AS + - quoted_literal: '"class_name"' + - keyword: USING + - file_keyword: FILE + - quoted_literal: '"resource_locations"' +- statement_terminator: ; +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: FUNCTION + - function_name_identifier: simple_udf + - keyword: AS + - quoted_literal: '''SimpleUdf''' + - keyword: USING + - file_keyword: JAR + - quoted_literal: '''/tmp/SimpleUdf.jar''' +- statement_terminator: ; +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: TEMPORARY + - keyword: FUNCTION + - function_name_identifier: simple_temp_udf + - keyword: AS + - quoted_literal: '''SimpleUdf''' + - keyword: USING + - file_keyword: JAR + - quoted_literal: '''/tmp/SimpleUdf.jar''' +- statement_terminator: ; +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: OR + - keyword: REPLACE + - keyword: FUNCTION + - function_name_identifier: simple_udf + - keyword: AS + - quoted_literal: '''SimpleUdfR''' + - keyword: USING + - file_keyword: JAR + - quoted_literal: '''/tmp/SimpleUdfR.jar''' +- statement_terminator: ; +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: FUNCTION + - function_name_identifier: test_avg + - keyword: AS + - quoted_literal: '''org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage''' +- statement_terminator: ; +- statement: + - create_function_statement: + - keyword: CREATE + - keyword: TEMPORARY + - keyword: FUNCTION + - function_name_identifier: test_avg + - keyword: AS + - quoted_literal: '''org.apache.hadoop.hive.ql.udf.generic.GenericUDAFAverage''' +- statement_terminator: ; +- file: + - word: CREATE + - word: TEMPORARY + - word: FUNCTION + - word: hello + - start_bracket: ( + - end_bracket: ) + - word: RETURNS + - word: STRING + - word: RETURN + - single_quote: '''Hello World!''' + - semicolon: ; + - word: CREATE + - word: OR + - word: REPLACE + - word: TEMPORARY + - word: FUNCTION + - word: function_name + - start_bracket: ( + - end_bracket: ) + - word: RETURNS + - word: TIMESTAMP + - word: LANGUAGE + - word: SQL + - word: RETURN + - word: SELECT + - word: MAX + - start_bracket: ( + - word: time + - end_bracket: ) + - word: AS + - word: time + - word: FROM + - word: my_table + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: area + - start_bracket: ( + - word: x + - word: DOUBLE + - comma: ',' + - word: y + - word: DOUBLE + - end_bracket: ) + - word: RETURNS + - word: DOUBLE + - word: RETURN + - word: x + - star: '*' + - word: y + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: square + - start_bracket: ( + - word: x + - word: DOUBLE + - end_bracket: ) + - word: RETURNS + - word: DOUBLE + - word: RETURN + - word: area + - start_bracket: ( + - word: x + - comma: ',' + - word: x + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: cte_function + - start_bracket: ( + - word: x + - word: INT + - end_bracket: ) + - word: RETURNS + - word: string + - word: LANGUAGE + - word: SQL + - word: RETURN + - word: WITH + - word: cte + - word: AS + - start_bracket: ( + - word: SELECT + - word: x + - word: AS + - word: y + - end_bracket: ) + - word: SELECT + - star: '*' + - word: FROM + - word: cte + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: roll_dice + - start_bracket: ( + - end_bracket: ) + - word: RETURNS + - word: INT + - word: NOT + - word: DETERMINISTIC + - word: CONTAINS + - word: SQL + - word: COMMENT + - single_quote: '''Roll a single 6 sided die''' + - word: RETURN + - start_bracket: ( + - word: rand + - start_bracket: ( + - end_bracket: ) + - star: '*' + - numeric_literal: '6' + - end_bracket: ) + - casting_operator: '::' + - word: INT + - plus: + + - numeric_literal: '1' + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: roll_dice + - start_bracket: ( + - word: num_dice + - word: INT + - word: DEFAULT + - numeric_literal: '1' + - word: COMMENT + - single_quote: '''number of dice to roll (Default: 1)''' + - comma: ',' + - word: num_sides + - word: INT + - word: DEFAULT + - numeric_literal: '6' + - word: COMMENT + - single_quote: '''number of sides per die (Default: 6)''' + - end_bracket: ) + - word: RETURNS + - word: INT + - word: NOT + - word: DETERMINISTIC + - word: CONTAINS + - word: SQL + - word: COMMENT + - single_quote: '''Roll a number of n-sided dice''' + - word: RETURN + - word: aggregate + - start_bracket: ( + - word: sequence + - start_bracket: ( + - numeric_literal: '1' + - comma: ',' + - word: roll_dice + - dot: . + - word: num_dice + - comma: ',' + - numeric_literal: '1' + - end_bracket: ) + - comma: ',' + - numeric_literal: '0' + - comma: ',' + - start_bracket: ( + - word: acc + - comma: ',' + - word: x + - end_bracket: ) + - right_arrow: -> + - start_bracket: ( + - word: rand + - start_bracket: ( + - end_bracket: ) + - star: '*' + - word: roll_dice + - dot: . + - word: num_sides + - end_bracket: ) + - casting_operator: '::' + - word: int + - comma: ',' + - word: acc + - right_arrow: -> + - word: acc + - plus: + + - word: roll_dice + - dot: . + - word: num_dice + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: main + - dot: . + - word: default + - dot: . + - word: greet + - start_bracket: ( + - word: s + - word: STRING + - end_bracket: ) + - word: RETURNS + - word: STRING + - word: LANGUAGE + - word: PYTHON + - word: AS + - dollar_quote: |- + $$ + def greet(name): + return "Hello " + name + "!" + + return greet(s) if s else None + $$ + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: return_table + - start_bracket: ( + - end_bracket: ) + - word: RETURNS + - word: TABLE + - word: RETURN + - word: SELECT + - word: time + - word: FROM + - word: my_table + - semicolon: ; + - word: CREATE + - word: FUNCTION + - word: return_table + - start_bracket: ( + - end_bracket: ) + - word: RETURNS + - word: TABLE + - start_bracket: ( + - word: col_a + - word: string + - comma: ',' + - word: col_b + - word: string + - word: comment + - double_quote: '"asdf"' + - end_bracket: ) + - word: RETURN + - word: SELECT + - word: col_a + - comma: ',' + - word: col_b + - word: FROM + - word: my_table + - semicolon: ; + - word: create + - word: or + - word: replace + - word: function + - back_quote: '`catalog`' + - dot: . + - back_quote: '`schema`' + - dot: . + - back_quote: '`name`' + - start_bracket: ( + - word: param + - word: int + - end_bracket: ) + - word: returns + - word: int + - word: return + - word: select + - word: param + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.yml new file mode 100644 index 000000000..6f937ada9 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_table.yml @@ -0,0 +1,362 @@ +file: +- statement: + - create_table_statement: + - keyword: CREATE + - keyword: TABLE + - table_reference: + - naked_identifier: tablename +- file: + - start_bracket: ( + - word: id_column + - word: INT + - comma: ',' + - word: othercolumn + - word: STRING + - comma: ',' + - word: generated_always_as_expression + - word: DATE + - word: GENERATED + - word: ALWAYS + - word: AS + - start_bracket: ( + - word: CAST + - start_bracket: ( + - word: birth_date + - word: AS + - word: DATE + - end_bracket: ) + - end_bracket: ) + - comma: ',' + - word: generated_by_default + - word: BIGINT + - word: GENERATED + - word: BY + - word: DEFAULT + - word: AS + - word: IDENTITY + - comma: ',' + - word: generated_always + - word: BIGINT + - word: GENERATED + - word: ALWAYS + - word: AS + - word: IDENTITY + - comma: ',' + - word: generated_column_start_with + - word: BIGINT + - word: GENERATED + - word: ALWAYS + - word: AS + - word: IDENTITY + - start_bracket: ( + - word: START + - word: WITH + - numeric_literal: '10' + - end_bracket: ) + - comma: ',' + - word: generated_column_increment_by + - word: BIGINT + - word: GENERATED + - word: ALWAYS + - word: AS + - word: IDENTITY + - start_bracket: ( + - word: INCREMENT + - word: BY + - numeric_literal: '5' + - end_bracket: ) + - comma: ',' + - word: generated_column_start_with_increment_by + - word: BIGINT + - word: GENERATED + - word: ALWAYS + - word: AS + - word: IDENTITY + - start_bracket: ( + - word: START + - word: WITH + - numeric_literal: '10' + - word: INCREMENT + - word: BY + - numeric_literal: '5' + - end_bracket: ) + - end_bracket: ) + - word: USING + - word: DELTA + - word: LOCATION + - double_quote: '"s3://someplace"' + - word: CLUSTER + - word: BY + - start_bracket: ( + - word: id_column + - end_bracket: ) + - semicolon: ; + - word: OPTIMIZE + - word: tablename + - semicolon: ; + - word: OPTIMIZE + - word: tablename + - word: WHERE + - word: date + - raw_comparison_operator: '>' + - raw_comparison_operator: = + - word: current_timestamp + - start_bracket: ( + - end_bracket: ) + - minus: '-' + - word: INTERVAL + - numeric_literal: '1' + - word: day + - word: ZORDER + - word: BY + - start_bracket: ( + - word: eventType + - comma: ',' + - word: eventTime + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student_copy + - word: AS + - word: SELECT + - star: '*' + - word: FROM + - word: student + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - word: USING + - word: CSV + - word: LOCATION + - single_quote: '''/path/to/csv_files''' + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - word: COMMENT + - single_quote: '''this is a comment''' + - word: TBLPROPERTIES + - start_bracket: ( + - single_quote: '''foo''' + - raw_comparison_operator: = + - single_quote: '''bar''' + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - word: TBLPROPERTIES + - start_bracket: ( + - single_quote: '''foo''' + - raw_comparison_operator: = + - single_quote: '''bar''' + - end_bracket: ) + - word: COMMENT + - single_quote: '''this is a comment''' + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - word: PARTITIONED + - word: BY + - start_bracket: ( + - word: age + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: rectangles + - start_bracket: ( + - word: a + - word: INT + - comma: ',' + - word: b + - word: INT + - comma: ',' + - word: area + - word: INT + - word: GENERATED + - word: ALWAYS + - word: AS + - start_bracket: ( + - word: a + - star: '*' + - word: b + - end_bracket: ) + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: rectangles + - start_bracket: ( + - word: a + - word: INT + - comma: ',' + - word: b + - word: INT + - word: PRIMARY + - word: KEY + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: rectangles + - start_bracket: ( + - word: a + - word: INT + - word: NOT + - word: 'NULL' + - comma: ',' + - word: b + - word: INT + - word: NOT + - word: 'NULL' + - word: PRIMARY + - word: KEY + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: OR + - word: REPLACE + - word: TABLE + - word: TABLE1 + - start_bracket: ( + - word: DATE_VALUE + - word: DATE + - word: NOT + - word: 'NULL' + - word: CONSTRAINT + - word: DATE_CONSTRAINT + - word: FOREIGN + - word: KEY + - word: REFERENCES + - word: TABLE2 + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - word: DEFAULT + - single_quote: '''bobby tables''' + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: student + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - word: NOT + - word: 'NULL' + - word: DEFAULT + - single_quote: '''bobby tables''' + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: clock + - start_bracket: ( + - word: which_time + - word: TIMESTAMP + - word: DEFAULT + - word: current_timestamp + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: clock + - start_bracket: ( + - word: which_time + - word: TIMESTAMP + - word: CONSTRAINT + - word: clock_pk + - word: PRIMARY + - word: KEY + - word: DEFAULT + - word: current_timestamp + - start_bracket: ( + - end_bracket: ) + - word: NOT + - word: 'NULL' + - end_bracket: ) + - semicolon: ; + - word: CREATE + - word: TABLE + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''student''' + - end_bracket: ) + - start_bracket: ( + - word: id + - word: INT + - comma: ',' + - word: name + - word: STRING + - comma: ',' + - word: age + - word: INT + - end_bracket: ) + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.yml new file mode 100644 index 000000000..4c6328f14 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_volume.yml @@ -0,0 +1,51 @@ +file: +- unparsable: + - word: CREATE + - word: VOLUME + - word: customer_vol + - semicolon: ; + - word: CREATE + - word: VOLUME + - word: IF + - word: NOT + - word: EXISTS + - word: customer_vol + - semicolon: ; + - word: CREATE + - word: VOLUME + - word: IF + - word: NOT + - word: EXISTS + - word: customer_vol + - word: COMMENT + - single_quote: '''This is customer volume''' + - semicolon: ; + - word: CREATE + - word: EXTERNAL + - word: VOLUME + - word: customer_vol_external + - word: LOCATION + - single_quote: '''s3://s3-path/''' + - semicolon: ; + - word: CREATE + - word: EXTERNAL + - word: VOLUME + - word: IF + - word: NOT + - word: EXISTS + - word: customer_vol_external + - word: LOCATION + - single_quote: '''s3://s3-path/''' + - semicolon: ; + - word: CREATE + - word: EXTERNAL + - word: VOLUME + - word: IF + - word: NOT + - word: EXISTS + - word: customer_vol_external + - word: LOCATION + - single_quote: '''s3://s3-path/''' + - word: COMMENT + - single_quote: '''This is customer volume''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.yml new file mode 100644 index 000000000..ee04fd25e --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/date_functions.yml @@ -0,0 +1,281 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: my_table + - dot: . + - naked_identifier: a + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: other_table + - dot: . + - naked_identifier: b + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: my_table + - join_clause: + - keyword: LEFT + - keyword: JOIN + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: other_table + - join_on_condition: + - keyword: ON + - expression: + - function: + - function_name: + - function_name_identifier: DATEDIFF + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: SECOND + - comma: ',' + - expression: + - column_reference: + - naked_identifier: my_table + - dot: . + - naked_identifier: timestamp_a + - comma: ',' + - expression: + - column_reference: + - naked_identifier: other_table + - dot: . + - naked_identifier: timestamp_b + - end_bracket: ) + - comparison_operator: + - raw_comparison_operator: '>' + - numeric_literal: '1' +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATE_ADD + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: MICROSECOND + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: date_add_micro + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATE_DIFF + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: MILLISECOND + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - comma: ',' + - expression: + - column_reference: + - naked_identifier: end_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: datediff_milli + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATEADD + - bracketed: + - start_bracket: ( + - date_part: MINUTE + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: dateadd_min + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATEDIFF + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: HOUR + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - comma: ',' + - expression: + - column_reference: + - naked_identifier: end_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: datediff_hr + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: TIMEDIFF + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: DAY + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - comma: ',' + - expression: + - column_reference: + - naked_identifier: end_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: timediff_day + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: TIMESTAMPADD + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: DAYOFYEAR + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: ts_add_day_of_yr + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: TIMESTAMPDIFF + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: WEEK + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - comma: ',' + - expression: + - column_reference: + - naked_identifier: end_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: ts_diff_week + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATE_ADD + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: MONTH + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: date_add_month + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATE_ADD + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: QUARTER + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: date_add_quarter + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: DATE_ADD + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: YEAR + - comma: ',' + - expression: + - numeric_literal: '5' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: start_dt + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: date_add_year + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: my_table +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.yml new file mode 100644 index 000000000..88adf1290 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/declare_or_replace_variable.yml @@ -0,0 +1,38 @@ +file: +- unparsable: + - word: DECLARE + - word: var + - semicolon: ; + - word: DECLARE + - word: OR + - word: REPLACE + - word: var + - semicolon: ; + - word: DECLARE + - word: OR + - word: REPLACE + - word: VARIABLE + - word: var + - semicolon: ; + - word: DECLARE + - word: var + - word: INT + - word: DEFAULT + - numeric_literal: '5' + - semicolon: ; + - word: DECLARE + - word: var + - word: INT + - raw_comparison_operator: = + - numeric_literal: '5' + - semicolon: ; + - word: DECLARE + - word: var + - raw_comparison_operator: = + - numeric_literal: '5' + - semicolon: ; + - word: DECLARE + - word: var + - word: DEFAULT + - numeric_literal: '5' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.yml new file mode 100644 index 000000000..73a07faa2 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/describe_volume.yml @@ -0,0 +1,8 @@ +file: +- statement: + - describe_statement: + - keyword: DESCRIBE + - table_reference: + - naked_identifier: VOLUME + - naked_identifier: VACCINE_VOLUME +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml new file mode 100644 index 000000000..febeead2f --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml @@ -0,0 +1,14 @@ +file: +- unparsable: + - word: DROP + - word: CATALOG + - word: vaccine + - word: CASCADE + - semicolon: ; + - word: DROP + - word: CATALOG + - word: IF + - word: EXISTS + - word: vaccine + - word: RESTRICT + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml new file mode 100644 index 000000000..79aeb3251 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml @@ -0,0 +1,12 @@ +file: +- unparsable: + - word: DROP + - word: VOLUME + - word: vaccine_volume + - semicolon: ; + - word: DROP + - word: VOLUME + - word: IF + - word: EXISTS + - word: vaccine_volume + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.yml new file mode 100644 index 000000000..5b1e62bc2 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/magic_line.yml @@ -0,0 +1,21 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: x + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: y + - unparsable: + - word: SELECT + - word: a + - word: FROM + - word: b +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml new file mode 100644 index 000000000..d478122b6 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml @@ -0,0 +1,89 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - function: + - function_name: + - function_name_identifier: my_function + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: arg1 + - unparsable: + - raw_comparison_operator: = + - raw_comparison_operator: '>' + - numeric_literal: '3' + - comma: ',' + - word: arg2 + - raw_comparison_operator: = + - raw_comparison_operator: '>' + - numeric_literal: '4' + - end_bracket: ) + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dual +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - function: + - function_name: + - function_name_identifier: my_function + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '3' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: arg2 + - unparsable: + - raw_comparison_operator: = + - raw_comparison_operator: '>' + - numeric_literal: '4' + - end_bracket: ) + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dual +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - function: + - function_name: + - function_name_identifier: my_function + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: arg1 + - unparsable: + - raw_comparison_operator: = + - raw_comparison_operator: '>' + - numeric_literal: '3' + - comma: ',' + - numeric_literal: '4' + - end_bracket: ) + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dual +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.yml new file mode 100644 index 000000000..a6b1fcdd3 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/pivot.yml @@ -0,0 +1,563 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: region + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4 + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - pivot_clause: + - keyword: PIVOT + - bracketed: + - start_bracket: ( + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sales + - keyword: FOR + - naked_identifier: quarter + - keyword: IN + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - alias_expression: + - keyword: AS + - naked_identifier: q1 + - comma: ',' + - expression: + - numeric_literal: '2' + - alias_expression: + - keyword: AS + - naked_identifier: q2 + - comma: ',' + - expression: + - numeric_literal: '3' + - alias_expression: + - keyword: AS + - naked_identifier: q3 + - comma: ',' + - expression: + - numeric_literal: '4' + - alias_expression: + - keyword: AS + - naked_identifier: q4 + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1_east + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1_west + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2_east + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2_west + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3_east + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3_west + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4_east + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4_west + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - pivot_clause: + - keyword: PIVOT + - bracketed: + - start_bracket: ( + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sales + - keyword: FOR + - bracketed: + - start_bracket: ( + - naked_identifier: quarter + - comma: ',' + - naked_identifier: region + - end_bracket: ) + - keyword: IN + - bracketed: + - start_bracket: ( + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - comma: ',' + - expression: + - quoted_literal: '''east''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q1_east + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - comma: ',' + - expression: + - quoted_literal: '''west''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q1_west + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '2' + - comma: ',' + - expression: + - quoted_literal: '''east''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q2_east + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '2' + - comma: ',' + - expression: + - quoted_literal: '''west''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q2_west + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '3' + - comma: ',' + - expression: + - quoted_literal: '''east''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q3_east + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '3' + - comma: ',' + - expression: + - quoted_literal: '''west''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q3_west + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '4' + - comma: ',' + - expression: + - quoted_literal: '''east''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q4_east + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '4' + - comma: ',' + - expression: + - quoted_literal: '''west''' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: q4_west + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4 + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - bracketed: + - start_bracket: ( + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: quarter + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: sales + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: s + - pivot_clause: + - keyword: PIVOT + - bracketed: + - start_bracket: ( + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sales + - keyword: FOR + - naked_identifier: quarter + - keyword: IN + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - alias_expression: + - keyword: AS + - naked_identifier: q1 + - comma: ',' + - expression: + - numeric_literal: '2' + - alias_expression: + - keyword: AS + - naked_identifier: q2 + - comma: ',' + - expression: + - numeric_literal: '3' + - alias_expression: + - keyword: AS + - naked_identifier: q3 + - comma: ',' + - expression: + - numeric_literal: '4' + - alias_expression: + - keyword: AS + - naked_identifier: q4 + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1_total + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1_avg + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2_total + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2_avg + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3_total + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3_avg + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4_total + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4_avg + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - bracketed: + - start_bracket: ( + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: quarter + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: sales + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: s + - pivot_clause: + - keyword: PIVOT + - bracketed: + - start_bracket: ( + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: total + - comma: ',' + - function: + - function_name: + - function_name_identifier: avg + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: avg + - keyword: FOR + - naked_identifier: quarter + - keyword: IN + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - alias_expression: + - keyword: AS + - naked_identifier: q1 + - comma: ',' + - expression: + - numeric_literal: '2' + - alias_expression: + - keyword: AS + - naked_identifier: q2 + - comma: ',' + - expression: + - numeric_literal: '3' + - alias_expression: + - keyword: AS + - naked_identifier: q3 + - comma: ',' + - expression: + - numeric_literal: '4' + - alias_expression: + - keyword: AS + - naked_identifier: q4 + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: year + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: region + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q1 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q2 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q3 + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: q4 + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - pivot_clause: + - keyword: PIVOT + - bracketed: + - start_bracket: ( + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: sales + - end_bracket: ) + - keyword: FOR + - naked_identifier: quarter + - keyword: IN + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - alias_expression: + - keyword: AS + - naked_identifier: q1 + - comma: ',' + - expression: + - numeric_literal: '2' + - alias_expression: + - keyword: AS + - naked_identifier: q2 + - comma: ',' + - expression: + - numeric_literal: '3' + - alias_expression: + - keyword: AS + - naked_identifier: q3 + - comma: ',' + - expression: + - numeric_literal: '4' + - alias_expression: + - keyword: AS + - naked_identifier: q4 + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/select.yml new file mode 100644 index 000000000..6274f7491 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select.yml @@ -0,0 +1,68 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - wildcard_expression: + - wildcard_identifier: + - star: '*' + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: shopify_cz + - unparsable: + - dot: . + - word: order +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - wildcard_expression: + - wildcard_identifier: + - star: '*' + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - function: + - function_name: + - function_name_identifier: IDENTIFIER + - bracketed: + - start_bracket: ( + - expression: + - quoted_literal: '''table_name''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - wildcard_expression: + - wildcard_identifier: + - star: '*' + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - function: + - function_name: + - function_name_identifier: IDENTIFIER + - bracketed: + - start_bracket: ( + - expression: + - quoted_literal: '''schema_name''' + - binary_operator: + - pipe: '|' + - pipe: '|' + - quoted_literal: '''.table_name''' + - end_bracket: ) +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.yml new file mode 100644 index 000000000..13f693550 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_from_lateral_view.yml @@ -0,0 +1,769 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: class + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: address + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: d_age + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '30' + - comma: ',' + - expression: + - numeric_literal: '60' + - end_bracket: ) + - end_bracket: ) + - naked_identifier: tbl_name + - keyword: AS + - naked_identifier: c_age + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '40' + - comma: ',' + - expression: + - numeric_literal: '80' + - end_bracket: ) + - end_bracket: ) + - keyword: AS + - naked_identifier: d_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: COUNT + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '30' + - comma: ',' + - expression: + - numeric_literal: '60' + - end_bracket: ) + - end_bracket: ) + - keyword: AS + - naked_identifier: c_age + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '40' + - comma: ',' + - expression: + - numeric_literal: '80' + - end_bracket: ) + - end_bracket: ) + - keyword: AS + - naked_identifier: d_age + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: c_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: class + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: address + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: d_age + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - naked_identifier: tbl_name + - keyword: AS + - naked_identifier: c_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: class + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: address + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: time + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - keyword: OUTER + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - naked_identifier: tbl_name + - keyword: AS + - naked_identifier: c_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: class + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: address + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: time + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - keyword: OUTER + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - naked_identifier: tbl_name + - naked_identifier: c_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: class + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: address + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: time + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: c_age + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - keyword: OUTER + - function: + - function_name: + - function_name_identifier: EXPLODE + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: ARRAY + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - naked_identifier: c_age +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: person + - dot: . + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: state + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: INLINE + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: array_of_structs + - end_bracket: ) + - naked_identifier: exploded_people + - keyword: AS + - naked_identifier: name + - comma: ',' + - naked_identifier: age + - comma: ',' + - naked_identifier: state +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: p + - dot: . + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: state + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - alias_expression: + - keyword: AS + - naked_identifier: p + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: INLINE + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: array_of_structs + - end_bracket: ) + - naked_identifier: exploded_people + - keyword: AS + - naked_identifier: name + - comma: ',' + - naked_identifier: age + - comma: ',' + - naked_identifier: state +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: p + - dot: . + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: state + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - alias_expression: + - keyword: AS + - naked_identifier: p + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: INLINE + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: array_of_structs + - end_bracket: ) + - naked_identifier: exploded_people +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: p + - dot: . + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: state + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - alias_expression: + - keyword: AS + - naked_identifier: p + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: INLINE + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: array_of_structs + - end_bracket: ) + - naked_identifier: exploded_people + - naked_identifier: name + - comma: ',' + - naked_identifier: age + - comma: ',' + - naked_identifier: state +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: p + - dot: . + - naked_identifier: id + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: age + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: exploded_people + - dot: . + - naked_identifier: state + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person + - alias_expression: + - keyword: AS + - naked_identifier: p + - lateral_view_clause: + - keyword: LATERAL + - keyword: VIEW + - function: + - function_name: + - function_name_identifier: INLINE + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: array_of_structs + - end_bracket: ) + - keyword: AS + - naked_identifier: name + - comma: ',' + - naked_identifier: age + - comma: ',' + - naked_identifier: state +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: t1 + - dot: . + - naked_identifier: column1 + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: CAST + - bracketed: + - start_bracket: ( + - expression: + - function: + - function_name: + - function_name_identifier: GET_JSON_OBJECT + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: things + - comma: ',' + - expression: + - quoted_literal: '''$.percentage''' + - end_bracket: ) + - keyword: AS + - data_type: + - primitive_type: + - keyword: DECIMAL + - bracketed_arguments: + - bracketed: + - start_bracket: ( + - numeric_literal: '16' + - comma: ',' + - numeric_literal: '8' + - end_bracket: ) + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: ptc + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: table1 + - alias_expression: + - keyword: AS + - naked_identifier: t1 + - join_clause: + - keyword: LEFT + - keyword: JOIN + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: table2 + - alias_expression: + - keyword: AS + - naked_identifier: t2 + - join_on_condition: + - keyword: ON + - expression: + - column_reference: + - naked_identifier: c + - dot: . + - naked_identifier: column1 + - comparison_operator: + - raw_comparison_operator: = + - column_reference: + - naked_identifier: p + - dot: . + - naked_identifier: column1 + - binary_operator: AND + - column_reference: + - naked_identifier: t2 + - dot: . + - naked_identifier: type + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''SOMETHING''' + - unparsable: + - word: LATERAL + - word: VIEW + - word: OUTER + - word: EXPLODE + - start_bracket: ( + - word: t2 + - dot: . + - word: column2 + - end_bracket: ) + - word: AS + - word: things +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.yml new file mode 100644 index 000000000..83688b339 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_group_by.yml @@ -0,0 +1,857 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: id + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: id +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - numeric_literal: '1' + - orderby_clause: + - keyword: ORDER + - keyword: BY + - numeric_literal: '1' +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: max + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: max_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: id + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: id +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - keyword: DISTINCT + - expression: + - column_reference: + - naked_identifier: city + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: count_distinct_city + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: car_model +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: id + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - keyword: FILTER + - bracketed: + - start_bracket: ( + - keyword: WHERE + - expression: + - column_reference: + - naked_identifier: car_model + - keyword: IN + - bracketed: + - start_bracket: ( + - quoted_literal: '''Honda Civic''' + - comma: ',' + - quoted_literal: '''Honda CRV''' + - end_bracket: ) + - end_bracket: ) + - alias_expression: + - keyword: AS + - quoted_identifier: '`sum(quantity)`' + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: id + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: id +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: city + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - grouping_sets_clause: + - keyword: GROUPING + - keyword: SETS + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - expression: + - bracketed: + - start_bracket: ( + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: city + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: city +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: city + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - grouping_sets_clause: + - keyword: GROUPING + - keyword: SETS + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - expression: + - bracketed: + - start_bracket: ( + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: city + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: city +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: city + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - comma: ',' + - grouping_sets_clause: + - keyword: GROUPING + - keyword: SETS + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - expression: + - bracketed: + - start_bracket: ( + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: city + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: car_model + - end_bracket: ) + - comma: ',' + - expression: + - bracketed: + - start_bracket: ( + - end_bracket: ) + - end_bracket: ) + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: city +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: city + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - with_cube_rollup_clause: + - keyword: WITH + - keyword: ROLLUP + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: city + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: car_model + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: quantity + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_quantity + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: dealer + - groupby_clause: + - keyword: GROUP + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model + - with_cube_rollup_clause: + - keyword: WITH + - keyword: CUBE + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: city + - comma: ',' + - column_reference: + - naked_identifier: car_model +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - function: + - function_name: + - function_name_identifier: first + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: age + - end_bracket: ) + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - function: + - function_name: + - function_name_identifier: first + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: age + - keyword: IGNORE + - keyword: NULLS + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: first_age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: last + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: id + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: last_id + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: sum + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: id + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: sum_id + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: person +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: people + - groupby_clause: + - keyword: GROUP + - keyword: BY + - cube_rollup_clause: + - function_name: + - function_name_identifier: cube + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - column_reference: + - naked_identifier: name + - comma: ',' + - column_reference: + - naked_identifier: age + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: people + - groupby_clause: + - keyword: GROUP + - keyword: BY + - cube_rollup_clause: + - function_name: + - function_name_identifier: cube + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - column_reference: + - naked_identifier: name + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: people + - groupby_clause: + - keyword: GROUP + - keyword: BY + - cube_rollup_clause: + - function_name: + - function_name_identifier: cube + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - column_reference: + - naked_identifier: name + - comma: ',' + - column_reference: + - naked_identifier: age + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: people + - groupby_clause: + - keyword: GROUP + - keyword: BY + - cube_rollup_clause: + - function_name: + - function_name_identifier: rollup + - bracketed: + - start_bracket: ( + - grouping_expression_list: + - column_reference: + - naked_identifier: name + - comma: ',' + - column_reference: + - naked_identifier: age + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - column_reference: + - naked_identifier: name + - comma: ',' + - select_clause_element: + - column_reference: + - naked_identifier: age + - comma: ',' + - select_clause_element: + - function: + - function_name: + - function_name_identifier: count + - bracketed: + - start_bracket: ( + - star: '*' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: record_count + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: people + - unparsable: + - word: GROUP + - word: BY + - word: ALL +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.yml new file mode 100644 index 000000000..4b4636964 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/select_window.yml @@ -0,0 +1,77 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - function: + - function_name: + - function_name_identifier: lag + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: test + - end_bracket: ) + - over_clause: + - keyword: over + - bracketed: + - start_bracket: ( + - window_specification: + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: test + - end_bracket: ) + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: schema + - dot: . + - naked_identifier: test_table +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - function: + - function_name: + - function_name_identifier: lag + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: test + - end_bracket: ) + - over_clause: + - keyword: over + - bracketed: + - start_bracket: ( + - window_specification: + - partitionby_clause: + - keyword: PARTITION + - keyword: BY + - expression: + - column_reference: + - naked_identifier: test + - orderby_clause: + - keyword: ORDER + - keyword: BY + - column_reference: + - naked_identifier: test + - end_bracket: ) + - from_clause: + - keyword: from + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: schema + - dot: . + - naked_identifier: test_table +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml new file mode 100644 index 000000000..65f850895 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml @@ -0,0 +1,38 @@ +file: +- statement: + - set_statement: + - keyword: SET + - property_name_identifier: + - properties_naked_identifier: TIME + - naked_identifier: ZONE +- file: + - word: LOCAL + - semicolon: ; + - word: SET + - word: TIME + - word: ZONE + - single_quote: '''America/Los_Angeles''' + - semicolon: ; + - word: SET + - word: TIME + - word: ZONE + - single_quote: '''+08:00''' + - semicolon: ; + - word: SET + - word: TIME + - word: ZONE + - word: INTERVAL + - numeric_literal: '1' + - word: HOUR + - numeric_literal: '30' + - word: MINUTES + - semicolon: ; + - word: SET + - word: TIME + - word: ZONE + - word: INTERVAL + - single_quote: '''08:30:00''' + - word: HOUR + - word: TO + - word: SECOND + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml new file mode 100644 index 000000000..d8c2ffe5c --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml @@ -0,0 +1,89 @@ +file: +- statement: + - set_statement: + - keyword: SET + - property_name_identifier: + - properties_naked_identifier: VAR + - naked_identifier: var1 +- file: + - raw_comparison_operator: = + - numeric_literal: '5' + - semicolon: ; + - word: SET + - word: VARIABLE + - word: var1 + - raw_comparison_operator: = + - start_bracket: ( + - word: SELECT + - word: max + - start_bracket: ( + - word: c1 + - end_bracket: ) + - word: FROM + - word: VALUES + - start_bracket: ( + - numeric_literal: '1' + - end_bracket: ) + - comma: ',' + - start_bracket: ( + - numeric_literal: '2' + - end_bracket: ) + - word: AS + - word: t + - start_bracket: ( + - word: c1 + - end_bracket: ) + - end_bracket: ) + - semicolon: ; + - word: SET + - word: VAR + - word: var1 + - raw_comparison_operator: = + - word: DEFAULT + - semicolon: ; + - word: SET + - word: VAR + - start_bracket: ( + - word: var1 + - comma: ',' + - word: var2 + - comma: ',' + - word: var3 + - end_bracket: ) + - raw_comparison_operator: = + - start_bracket: ( + - word: VALUES + - start_bracket: ( + - numeric_literal: '100' + - comma: ',' + - single_quote: '''x123''' + - comma: ',' + - word: DEFAULT + - end_bracket: ) + - end_bracket: ) + - semicolon: ; + - word: SET + - word: VARIABLE + - back_quote: '`foo`' + - raw_comparison_operator: = + - word: select + - single_quote: '''bar''' + - semicolon: ; + - word: set + - word: var + - word: tz + - raw_comparison_operator: = + - word: current_timezone + - start_bracket: ( + - end_bracket: ) + - semicolon: ; + - word: set + - word: var + - word: x1 + - raw_comparison_operator: = + - numeric_literal: '12' + - comma: ',' + - word: x2 + - raw_comparison_operator: = + - single_quote: '''helloworld''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.yml new file mode 100644 index 000000000..825b492d0 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_databases.yml @@ -0,0 +1,39 @@ +file: +- statement: + - show_statement: + - keyword: SHOW + - keyword: DATABASES +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: DATABASES +- file: + - word: FROM + - word: userdb + - semicolon: ; + - word: SHOW + - word: DATABASES + - word: IN + - word: userdb + - semicolon: ; + - word: SHOW + - word: DATABASES + - word: FROM + - word: default + - word: LIKE + - single_quote: '''sam*''' + - semicolon: ; + - word: SHOW + - word: DATABASES + - word: FROM + - word: default + - single_quote: '''sam*''' + - semicolon: ; + - word: SHOW + - word: DATABASES + - single_quote: '''sam*|suj''' + - semicolon: ; + - word: SHOW + - word: SCHEMAS + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.yml new file mode 100644 index 000000000..ecb017261 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_functions.yml @@ -0,0 +1,100 @@ +file: +- unparsable: + - word: SHOW + - word: FUNCTIONS + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: trim + - semicolon: ; + - word: SHOW + - word: ALL + - word: FUNCTIONS + - word: trim + - semicolon: ; + - word: SHOW + - word: SYSTEM + - word: FUNCTIONS + - word: concat + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - word: concat_user + - semicolon: ; + - word: SHOW + - word: SYSTEM + - word: FUNCTIONS + - word: salesdb + - dot: . + - word: max + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: LIKE + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - word: LIKE + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: LIKE + - single_quote: '''yea*|windo*''' + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: LIKE + - single_quote: '''t[a-z][a-z][a-z]''' + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: FROM + - word: default + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - word: FROM + - word: default + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: FROM + - word: default + - word: LIKE + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: FUNCTIONS + - word: FROM + - word: default + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - word: FROM + - word: default + - word: LIKE + - single_quote: '''t*''' + - semicolon: ; + - word: SHOW + - word: USER + - word: FUNCTIONS + - word: FROM + - word: default + - single_quote: '''t*''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml new file mode 100644 index 000000000..2bef920b6 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml @@ -0,0 +1,46 @@ +file: +- statement: + - show_statement: + - keyword: SHOW + - keyword: TABLES +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: TABLES + - keyword: FROM + - database_reference: + - naked_identifier: userdb +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: TABLES + - keyword: IN + - database_reference: + - naked_identifier: userdb +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: TABLES + - keyword: FROM + - database_reference: + - naked_identifier: default + - keyword: LIKE + - quoted_literal: '''sam*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: TABLES + - keyword: FROM + - database_reference: + - naked_identifier: default +- file: + - single_quote: '''sam*''' + - semicolon: ; + - word: SHOW + - word: TABLES + - single_quote: '''sam*|suj''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml new file mode 100644 index 000000000..f18dd5272 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml @@ -0,0 +1,53 @@ +file: +- statement: + - show_statement: + - keyword: SHOW + - keyword: VIEWS +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - database_reference: + - naked_identifier: userdb +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: IN + - database_reference: + - naked_identifier: global_temp +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - database_reference: + - naked_identifier: default + - keyword: LIKE + - quoted_literal: '''sam*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: LIKE + - quoted_literal: '''sam|suj|temp*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - database_reference: + - naked_identifier: default +- file: + - single_quote: '''sam*''' + - semicolon: ; + - word: SHOW + - word: VIEWS + - single_quote: '''sam|suj|temp*''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml new file mode 100644 index 000000000..a37bd35fd --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml @@ -0,0 +1,50 @@ +file: +- unparsable: + - word: SHOW + - word: VOLUMES + - semicolon: ; + - word: SHOW + - word: VOLUMES + - word: IN + - word: sampledb + - semicolon: ; + - word: SHOW + - word: VOLUMES + - word: FROM + - word: sampledb + - semicolon: ; + - word: SHOW + - word: VOLUMES + - word: LIKE + - single_quote: '''regex*''' + - semicolon: ; + - word: SHOW + - word: VOLUMES + - single_quote: '''regex*''' + - semicolon: ; + - word: SHOW + - word: VOLUMES + - word: IN + - word: sampledb + - word: LIKE + - single_quote: '''regex*''' + - semicolon: ; + - word: SHOW + - word: VOLUMES + - word: IN + - word: sampledb + - single_quote: '''regex*''' + - semicolon: ; + - word: SHOW + - word: VOLUMES + - word: FROM + - word: sampledb + - word: LIKE + - single_quote: '''regex*''' + - semicolon: ; + - word: SHOW + - word: VOLUMES + - word: FROM + - word: sampledb + - single_quote: '''regex*''' + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.yml new file mode 100644 index 000000000..18b854bd0 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/unpivot.yml @@ -0,0 +1,99 @@ +file: +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - wildcard_expression: + - wildcard_identifier: + - star: '*' + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: sales + - alias_expression: + - naked_identifier: UNPIVOT + - unparsable: + - word: INCLUDE + - word: NULLS + - start_bracket: ( + - word: sales + - word: FOR + - word: quarter + - word: IN + - start_bracket: ( + - word: q1 + - word: AS + - back_quote: '`Jan-Mar`' + - comma: ',' + - word: q2 + - word: AS + - back_quote: '`Apr-Jun`' + - comma: ',' + - word: q3 + - word: AS + - back_quote: '`Jul-Sep`' + - comma: ',' + - word: sales + - dot: . + - word: q4 + - word: AS + - back_quote: '`Oct-Dec`' + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - wildcard_expression: + - wildcard_identifier: + - star: '*' + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - table_reference: + - naked_identifier: oncall + - alias_expression: + - naked_identifier: UNPIVOT + - unparsable: + - start_bracket: ( + - start_bracket: ( + - word: name + - comma: ',' + - word: email + - comma: ',' + - word: phone + - end_bracket: ) + - word: FOR + - word: precedence + - word: IN + - start_bracket: ( + - start_bracket: ( + - word: name1 + - comma: ',' + - word: email1 + - comma: ',' + - word: phone1 + - end_bracket: ) + - word: AS + - word: primary + - comma: ',' + - start_bracket: ( + - word: name2 + - comma: ',' + - word: email2 + - comma: ',' + - word: phone2 + - end_bracket: ) + - word: AS + - word: secondary + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml new file mode 100644 index 000000000..3b1cc413b --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml @@ -0,0 +1,25 @@ +file: +- statement: + - use_statement: + - keyword: USE + - database_reference: + - naked_identifier: CATALOG +- file: + - word: catalog_name + - semicolon: ; + - word: USE + - word: CATALOG + - word: hive_metastore + - semicolon: ; + - word: USE + - word: CATALOG + - single_quote: '''hive_metastore''' + - semicolon: ; + - word: USE + - word: CATALOG + - back_quote: '`some_catalog`' + - semicolon: ; + - word: USE + - word: CATALOG + - word: some_cat + - semicolon: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml new file mode 100644 index 000000000..19a636ed7 --- /dev/null +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml @@ -0,0 +1,51 @@ +file: +- statement: + - use_statement: + - keyword: USE + - database_reference: + - naked_identifier: database_name +- statement_terminator: ; +- statement: + - use_statement: + - keyword: USE + - database_reference: + - naked_identifier: userdb +- statement_terminator: ; +- statement: + - use_statement: + - keyword: USE + - database_reference: + - naked_identifier: userdb1 +- statement_terminator: ; +- statement: + - use_statement: + - keyword: USE + - database_reference: + - naked_identifier: DATABASE +- file: + - word: database_name + - semicolon: ; + - word: USE + - word: SCHEMA + - word: database_name + - semicolon: ; + - word: USE + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''database_name''' + - end_bracket: ) + - semicolon: ; + - word: USE + - word: DATABASE + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''database_name''' + - end_bracket: ) + - semicolon: ; + - word: USE + - word: SCHEMA + - word: IDENTIFIER + - start_bracket: ( + - single_quote: '''database_name''' + - end_bracket: ) + - semicolon: ; From 74f2e1a3531f9b074ec1b36455328a36ddb5e30f Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:11:33 +0100 Subject: [PATCH 04/19] progress --- crates/lib-dialects/src/databricks.rs | 364 +++++++++--------- .../dialects/databricks/named_argument.yml | 12 +- 2 files changed, 186 insertions(+), 190 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index cd06a09c7..5955d0f78 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -50,26 +50,26 @@ pub fn dialect() -> Dialect { // Datbricks Notebook Start: // Needed to insert "so early" to avoid magic + notebook // start to be interpreted as inline comment - databrikcs.insert_lexer_matchers( - vec![ - Matcher::regex( - "notebook_start", - r"-- Databricks notebook source(\r?\n){1}", - SyntaxKind::CommentStatement, - ), - Matcher::regex( - "magic_line", - r"(-- MAGIC)( [^%]{1})([^\n]*)", - SyntaxKind::Code, - ), - Matcher::regex( - "magic_start", - r"(-- MAGIC %)([^\n]{2,})(\r?\n)", - SyntaxKind::CodeSegment, - ), - ], - "inline_comment", - ); + // databricks.insert_lexer_matchers( + // vec![ + // Matcher::regex( + // "notebook_start", + // r"-- Databricks notebook source(\r?\n){1}", + // SyntaxKind::NotebookStart, + // ), + // Matcher::regex( + // "magic_line", + // r"(-- MAGIC)( [^%]{1})([^\n]*)", + // SyntaxKind::MagicLine, + // ), + // Matcher::regex( + // "magic_start", + // r"(-- MAGIC %)([^\n]{2,})(\r?\n)", + // SyntaxKind::MagicStart, + // ), + // ], + // "inline_comment", + // ); databricks.add([ ( @@ -101,171 +101,171 @@ pub fn dialect() -> Dialect { .to_matchable() .into(), ), - ( - "ConstraintOptionGrammar".into(), - Sequence::new(vec_of_erased![ - Sequence::new(vec_of_erased![ - Ref::keyword("ENABLE"), - Ref::keyword("NOVALIDATE") - ]) - .config(|config| { config.optional() }), - Sequence::new(vec_of_erased![ - Ref::keyword("NOT"), - Ref::keyword("ENFORCED") - ]) - .config(|config| { config.optional() }), - Sequence::new(vec_of_erased![Ref::keyword("DEFERRABLE")]) - .config(|config| { config.optional() }), - Sequence::new(vec_of_erased![ - Ref::keyword("INITIALLY"), - Ref::keyword("DEFERRED") - ]) - .config(|config| { config.optional() }), - one_of(vec_of_erased![Ref::keyword("NORELY"), Ref::keyword("RELY"),]) - .config(|config| { config.optional() }), - ]) - .to_matchable() - .into(), - ), - ( - "ForeignKeyOptionGrammar".into(), - Sequence::new(vec_of_erased![ - Sequence::new(vec_of_erased![Ref::keyword("MATCH"), Ref::keyword("FULL"),]) - .config(|config| { config.optional() }), - Sequence::new(vec_of_erased![ - Ref::keyword("ON"), - Ref::keyword("UPDATE"), - Ref::keyword("NO"), - Ref::keyword("ACTION"), - ]) - .config(|config| { config.optional() }), - Sequence::new(vec_of_erased![ - Ref::keyword("ON"), - Ref::keyword("DELETE"), - Ref::keyword("NO"), - Ref::keyword("ACTION"), - ]), - ]), - ), - // DropConstraintGrammar=Sequence( - // "DROP", - // OneOf( - // Sequence( - // Ref("PrimaryKeyGrammar"), - // Ref("IfExistsGrammar", optional=True), - // OneOf( - // "RESTRICT", - // "CASCADE", - // optional=True, - // ), - // ), - // Sequence( - // Ref("ForeignKeyGrammar"), - // Ref("IfExistsGrammar", optional=True), - // Bracketed( - // Delimited( - // Ref("ColumnReferenceSegment"), - // ) - // ), - // ), - // Sequence( - // "CONSTRAINT", - // Ref("IfExistsGrammar", optional=True), - // Ref("ObjectReferenceSegment"), - // OneOf( - // "RESTRICT", - // "CASCADE", - // optional=True, - // ), - // ), - // ), - // ), - ( - "DropConstraintGrammar".into(), - one_of(vec_of_erased![ - Sequence::new(vec_of_erased![ - Ref::new("PrimaryKeyGrammar"), - Ref::new("IfExistsGrammar").optional(), - one_of(vec_of_erased![ - Ref::keyword("RESTRICT"), - Ref::keyword("CASCADE"), - ]) - .config(|config| config.optional()), - ]), - Sequence::new(vec_of_erased![ - Ref::new("ForeignKeyGrammar"), - Ref::new("IfExistsGrammar").optional(), - Ref::new("Bracketed").config(|config| { - config.set_children(vec_of_erased![Ref::new("ColumnReferenceSegment")]) - }), - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("CONSTRAINT"), - Ref::new("IfExistsGrammar").optional(), - Ref::new("ObjectReferenceSegment"), - one_of(vec_of_erased![ - Ref::keyword("RESTRICT"), - Ref::keyword("CASCADE"), - ]) - .config(|config| config.optional()), - ]), - ]) - .to_matchable() - .into(), - ), - // AlterPartitionGrammar=Sequence( - // "PARTITION", - // Bracketed( - // Delimited( - // AnyNumberOf( - // OneOf( - // Ref("ColumnReferenceSegment"), - // Ref("SetClauseSegment"), - // ), - // min_times=1, - // ), - // ), - // ), - // ), - // RowFilterClauseGrammar=Sequence( - // "ROW", - // "FILTER", - // Ref("ObjectReferenceSegment"), - // "ON", - // Bracketed( - // Delimited( - // OneOf( - // Ref("ColumnReferenceSegment"), - // Ref("LiteralGrammar"), - // ), - // optional=True, - // ), - // ), - // ), - // PropertiesBackTickedIdentifierSegment=RegexParser( - // r"`.+`", - // IdentifierSegment, - // type="properties_naked_identifier", + // ( + // "ConstraintOptionGrammar".into(), + // Sequence::new(vec_of_erased![ + // Sequence::new(vec_of_erased![ + // Ref::keyword("ENABLE"), + // Ref::keyword("NOVALIDATE") + // ]) + // .config(|config| { config.optional() }), + // Sequence::new(vec_of_erased![ + // Ref::keyword("NOT"), + // Ref::keyword("ENFORCED") + // ]) + // .config(|config| { config.optional() }), + // Sequence::new(vec_of_erased![Ref::keyword("DEFERRABLE")]) + // .config(|config| { config.optional() }), + // Sequence::new(vec_of_erased![ + // Ref::keyword("INITIALLY"), + // Ref::keyword("DEFERRED") + // ]) + // .config(|config| { config.optional() }), + // one_of(vec_of_erased![Ref::keyword("NORELY"), Ref::keyword("RELY"),]) + // .config(|config| { config.optional() }), + // ]) + // .to_matchable() + // .into(), // ), - // LocationWithCredentialGrammar=Sequence( - // "LOCATION", - // Ref("QuotedLiteralSegment"), - // Sequence( - // "WITH", - // Bracketed( - // "CREDENTIAL", - // Ref("PrincipalIdentifierSegment"), - // ), - // optional=True, - // ), + // ( + // "ForeignKeyOptionGrammar".into(), + // Sequence::new(vec_of_erased![ + // Sequence::new(vec_of_erased![Ref::keyword("MATCH"), Ref::keyword("FULL"),]) + // .config(|config| { config.optional() }), + // Sequence::new(vec_of_erased![ + // Ref::keyword("ON"), + // Ref::keyword("UPDATE"), + // Ref::keyword("NO"), + // Ref::keyword("ACTION"), + // ]) + // .config(|config| { config.optional() }), + // Sequence::new(vec_of_erased![ + // Ref::keyword("ON"), + // Ref::keyword("DELETE"), + // Ref::keyword("NO"), + // Ref::keyword("ACTION"), + // ]), + // ]), // ), - // NotebookStart=TypedParser("notebook_start", CommentSegment, type="notebook_start"), - // MagicLineGrammar=TypedParser("magic_line", CodeSegment, type="magic_line"), - // MagicStartGrammar=TypedParser("magic_start", CodeSegment, type="magic_start"), - // VariableNameIdentifierSegment=OneOf( - // Ref("NakedIdentifierSegment"), - // Ref("BackQuotedIdentifierSegment"), + // // DropConstraintGrammar=Sequence( + // // "DROP", + // // OneOf( + // // Sequence( + // // Ref("PrimaryKeyGrammar"), + // // Ref("IfExistsGrammar", optional=True), + // // OneOf( + // // "RESTRICT", + // // "CASCADE", + // // optional=True, + // // ), + // // ), + // // Sequence( + // // Ref("ForeignKeyGrammar"), + // // Ref("IfExistsGrammar", optional=True), + // // Bracketed( + // // Delimited( + // // Ref("ColumnReferenceSegment"), + // // ) + // // ), + // // ), + // // Sequence( + // // "CONSTRAINT", + // // Ref("IfExistsGrammar", optional=True), + // // Ref("ObjectReferenceSegment"), + // // OneOf( + // // "RESTRICT", + // // "CASCADE", + // // optional=True, + // // ), + // // ), + // // ), + // // ), + // ( + // "DropConstraintGrammar".into(), + // one_of(vec_of_erased![ + // Sequence::new(vec_of_erased![ + // Ref::new("PrimaryKeyGrammar"), + // Ref::new("IfExistsGrammar").optional(), + // one_of(vec_of_erased![ + // Ref::keyword("RESTRICT"), + // Ref::keyword("CASCADE"), + // ]) + // .config(|config| config.optional()), + // ]), + // Sequence::new(vec_of_erased![ + // Ref::new("ForeignKeyGrammar"), + // Ref::new("IfExistsGrammar").optional(), + // Ref::new("Bracketed").config(|config| { + // config.set_children(vec_of_erased![Ref::new("ColumnReferenceSegment")]) + // }), + // ]), + // Sequence::new(vec_of_erased![ + // Ref::keyword("CONSTRAINT"), + // Ref::new("IfExistsGrammar").optional(), + // Ref::new("ObjectReferenceSegment"), + // one_of(vec_of_erased![ + // Ref::keyword("RESTRICT"), + // Ref::keyword("CASCADE"), + // ]) + // .config(|config| config.optional()), + // ]), + // ]) + // .to_matchable() + // .into(), // ), + // // AlterPartitionGrammar=Sequence( + // // "PARTITION", + // // Bracketed( + // // Delimited( + // // AnyNumberOf( + // // OneOf( + // // Ref("ColumnReferenceSegment"), + // // Ref("SetClauseSegment"), + // // ), + // // min_times=1, + // // ), + // // ), + // // ), + // // ), + // // RowFilterClauseGrammar=Sequence( + // // "ROW", + // // "FILTER", + // // Ref("ObjectReferenceSegment"), + // // "ON", + // // Bracketed( + // // Delimited( + // // OneOf( + // // Ref("ColumnReferenceSegment"), + // // Ref("LiteralGrammar"), + // // ), + // // optional=True, + // // ), + // // ), + // // ), + // // PropertiesBackTickedIdentifierSegment=RegexParser( + // // r"`.+`", + // // IdentifierSegment, + // // type="properties_naked_identifier", + // // ), + // // LocationWithCredentialGrammar=Sequence( + // // "LOCATION", + // // Ref("QuotedLiteralSegment"), + // // Sequence( + // // "WITH", + // // Bracketed( + // // "CREDENTIAL", + // // Ref("PrincipalIdentifierSegment"), + // // ), + // // optional=True, + // // ), + // // ), + // // NotebookStart=TypedParser("notebook_start", CommentSegment, type="notebook_start"), + // // MagicLineGrammar=TypedParser("magic_line", CodeSegment, type="magic_line"), + // // MagicStartGrammar=TypedParser("magic_start", CodeSegment, type="magic_start"), + // // VariableNameIdentifierSegment=OneOf( + // // Ref("NakedIdentifierSegment"), + // // Ref("BackQuotedIdentifierSegment"), + // // ), ]); databricks.add([ diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml index d478122b6..f3ce07dbc 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/named_argument.yml @@ -13,13 +13,11 @@ file: - column_reference: - naked_identifier: arg1 - unparsable: - - raw_comparison_operator: = - - raw_comparison_operator: '>' + - right_arrow: => - numeric_literal: '3' - comma: ',' - word: arg2 - - raw_comparison_operator: = - - raw_comparison_operator: '>' + - right_arrow: => - numeric_literal: '4' - end_bracket: ) - from_clause: @@ -47,8 +45,7 @@ file: - column_reference: - naked_identifier: arg2 - unparsable: - - raw_comparison_operator: = - - raw_comparison_operator: '>' + - right_arrow: => - numeric_literal: '4' - end_bracket: ) - from_clause: @@ -73,8 +70,7 @@ file: - column_reference: - naked_identifier: arg1 - unparsable: - - raw_comparison_operator: = - - raw_comparison_operator: '>' + - right_arrow: => - numeric_literal: '3' - comma: ',' - numeric_literal: '4' From 8b534a1f35b7cb3e315b6af576c94089d6075e37 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:14:14 +0100 Subject: [PATCH 05/19] temp --- crates/lib-dialects/src/databricks.rs | 52 +++++++++++++-------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 5955d0f78..faa643ddf 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -101,32 +101,32 @@ pub fn dialect() -> Dialect { .to_matchable() .into(), ), - // ( - // "ConstraintOptionGrammar".into(), - // Sequence::new(vec_of_erased![ - // Sequence::new(vec_of_erased![ - // Ref::keyword("ENABLE"), - // Ref::keyword("NOVALIDATE") - // ]) - // .config(|config| { config.optional() }), - // Sequence::new(vec_of_erased![ - // Ref::keyword("NOT"), - // Ref::keyword("ENFORCED") - // ]) - // .config(|config| { config.optional() }), - // Sequence::new(vec_of_erased![Ref::keyword("DEFERRABLE")]) - // .config(|config| { config.optional() }), - // Sequence::new(vec_of_erased![ - // Ref::keyword("INITIALLY"), - // Ref::keyword("DEFERRED") - // ]) - // .config(|config| { config.optional() }), - // one_of(vec_of_erased![Ref::keyword("NORELY"), Ref::keyword("RELY"),]) - // .config(|config| { config.optional() }), - // ]) - // .to_matchable() - // .into(), - // ), + ( + "ConstraintOptionGrammar".into(), + Sequence::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("ENABLE"), + Ref::keyword("NOVALIDATE") + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("NOT"), + Ref::keyword("ENFORCED") + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![Ref::keyword("DEFERRABLE")]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("INITIALLY"), + Ref::keyword("DEFERRED") + ]) + .config(|config| { config.optional() }), + one_of(vec_of_erased![Ref::keyword("NORELY"), Ref::keyword("RELY"),]) + .config(|config| { config.optional() }), + ]) + .to_matchable() + .into(), + ), // ( // "ForeignKeyOptionGrammar".into(), // Sequence::new(vec_of_erased![ From 5e0402f12a6b4e138673ad50987dc64491c60fb0 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Tue, 17 Dec 2024 14:15:05 +0100 Subject: [PATCH 06/19] update [ci skip] --- crates/lib-dialects/src/databricks.rs | 297 ++++++++++++-------------- 1 file changed, 137 insertions(+), 160 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index faa643ddf..ddc446d11 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -1,3 +1,7 @@ +use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS}; +use sqruff_lib_core::parser::grammar::anyof::AnyNumberOf; +use sqruff_lib_core::parser::grammar::delimited::Delimited; +use sqruff_lib_core::parser::grammar::sequence::Bracketed; use sqruff_lib_core::{ dialects::{base::Dialect, init::DialectKind, syntax::SyntaxKind}, helpers::{Config, ToMatchable}, @@ -8,8 +12,6 @@ use sqruff_lib_core::{ vec_of_erased, }; -use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS}; - pub fn dialect() -> Dialect { let raw_sparksql = crate::sparksql::dialect(); @@ -127,138 +129,115 @@ pub fn dialect() -> Dialect { .to_matchable() .into(), ), + ( + "ForeignKeyOptionGrammar".into(), + Sequence::new(vec_of_erased![ + Sequence::new(vec_of_erased![Ref::keyword("MATCH"), Ref::keyword("FULL"),]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("ON"), + Ref::keyword("UPDATE"), + Ref::keyword("NO"), + Ref::keyword("ACTION"), + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("ON"), + Ref::keyword("DELETE"), + Ref::keyword("NO"), + Ref::keyword("ACTION"), + ]), + ]) + .to_matchable() + .into(), + ), + ( + "DropConstraintGrammar".into(), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("PrimaryKeyGrammar"), + Ref::new("IfExistsGrammar").optional(), + one_of(vec_of_erased![ + Ref::keyword("RESTRICT"), + Ref::keyword("CASCADE"), + ]) + .config(|config| config.optional()), + ]), + Sequence::new(vec_of_erased![ + Ref::new("ForeignKeyGrammar"), + Ref::new("IfExistsGrammar").optional(), + Bracketed::new(vec_of_erased![Ref::new("ColumnReferenceSegment")]), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("CONSTRAINT"), + Ref::new("IfExistsGrammar").optional(), + Ref::new("ObjectReferenceSegment"), + one_of(vec_of_erased![ + Ref::keyword("RESTRICT"), + Ref::keyword("CASCADE"), + ]) + .config(|config| config.optional()), + ]), + ]) + .to_matchable() + .into(), + ), + ( + "AlterPartitionGrammar".into(), + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![ + AnyNumberOf::new(vec_of_erased![one_of(vec_of_erased![ + Ref::new("ColumnReferenceSegment"), + Ref::new("SetClauseSegment"), + ]),]) + .config(|config| config.min_times(1)) + ])]) + .to_matchable() + .into(), + ), + ( + "RowFilterClauseGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("ROW"), + Ref::keyword("FILTER"), + Ref::new("ObjectReferenceSegment"), + Ref::keyword("ON"), + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![one_of( + vec_of_erased![ + Ref::new("ColumnReferenceSegment"), + Ref::new("LiteralGrammar"), + ] + )]) + .config(|config| config.optional())]) + ]) + .to_matchable() + .into(), + ), + // TODO Sort out the following grammar // ( - // "ForeignKeyOptionGrammar".into(), - // Sequence::new(vec_of_erased![ - // Sequence::new(vec_of_erased![Ref::keyword("MATCH"), Ref::keyword("FULL"),]) - // .config(|config| { config.optional() }), - // Sequence::new(vec_of_erased![ - // Ref::keyword("ON"), - // Ref::keyword("UPDATE"), - // Ref::keyword("NO"), - // Ref::keyword("ACTION"), - // ]) - // .config(|config| { config.optional() }), - // Sequence::new(vec_of_erased![ - // Ref::keyword("ON"), - // Ref::keyword("DELETE"), - // Ref::keyword("NO"), - // Ref::keyword("ACTION"), - // ]), - // ]), - // ), - // // DropConstraintGrammar=Sequence( - // // "DROP", - // // OneOf( - // // Sequence( - // // Ref("PrimaryKeyGrammar"), - // // Ref("IfExistsGrammar", optional=True), - // // OneOf( - // // "RESTRICT", - // // "CASCADE", - // // optional=True, - // // ), - // // ), - // // Sequence( - // // Ref("ForeignKeyGrammar"), - // // Ref("IfExistsGrammar", optional=True), - // // Bracketed( - // // Delimited( - // // Ref("ColumnReferenceSegment"), - // // ) - // // ), - // // ), - // // Sequence( - // // "CONSTRAINT", - // // Ref("IfExistsGrammar", optional=True), - // // Ref("ObjectReferenceSegment"), - // // OneOf( - // // "RESTRICT", - // // "CASCADE", - // // optional=True, - // // ), - // // ), - // // ), - // // ), - // ( - // "DropConstraintGrammar".into(), - // one_of(vec_of_erased![ - // Sequence::new(vec_of_erased![ - // Ref::new("PrimaryKeyGrammar"), - // Ref::new("IfExistsGrammar").optional(), - // one_of(vec_of_erased![ - // Ref::keyword("RESTRICT"), - // Ref::keyword("CASCADE"), - // ]) - // .config(|config| config.optional()), - // ]), - // Sequence::new(vec_of_erased![ - // Ref::new("ForeignKeyGrammar"), - // Ref::new("IfExistsGrammar").optional(), - // Ref::new("Bracketed").config(|config| { - // config.set_children(vec_of_erased![Ref::new("ColumnReferenceSegment")]) - // }), - // ]), - // Sequence::new(vec_of_erased![ - // Ref::keyword("CONSTRAINT"), - // Ref::new("IfExistsGrammar").optional(), - // Ref::new("ObjectReferenceSegment"), - // one_of(vec_of_erased![ - // Ref::keyword("RESTRICT"), - // Ref::keyword("CASCADE"), - // ]) - // .config(|config| config.optional()), - // ]), - // ]) - // .to_matchable() - // .into(), + // "PropertiesBackTickedIdentifierSegment".into(), + // Matcher::regex( + // "properties_naked_identifier", + // r"`.+`", + // SyntaxKind::PropertiesNakedIdentifier, + // ).to_matchable().into(), // ), - // // AlterPartitionGrammar=Sequence( - // // "PARTITION", - // // Bracketed( - // // Delimited( - // // AnyNumberOf( - // // OneOf( - // // Ref("ColumnReferenceSegment"), - // // Ref("SetClauseSegment"), - // // ), - // // min_times=1, - // // ), - // // ), - // // ), - // // ), - // // RowFilterClauseGrammar=Sequence( - // // "ROW", - // // "FILTER", - // // Ref("ObjectReferenceSegment"), - // // "ON", - // // Bracketed( - // // Delimited( - // // OneOf( - // // Ref("ColumnReferenceSegment"), - // // Ref("LiteralGrammar"), - // // ), - // // optional=True, - // // ), - // // ), - // // ), - // // PropertiesBackTickedIdentifierSegment=RegexParser( - // // r"`.+`", - // // IdentifierSegment, - // // type="properties_naked_identifier", - // // ), - // // LocationWithCredentialGrammar=Sequence( - // // "LOCATION", - // // Ref("QuotedLiteralSegment"), - // // Sequence( - // // "WITH", - // // Bracketed( - // // "CREDENTIAL", - // // Ref("PrincipalIdentifierSegment"), - // // ), - // // optional=True, - // // ), - // // ), + ( + "LocationWithCredentialGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("LOCATION"), + Ref::new("QuotedLiteralSegment"), + Sequence::new(vec_of_erased![ + Ref::keyword("WITH"), + Bracketed::new(vec_of_erased![ + Ref::keyword("CREDENTIAL"), + Ref::new("PrincipalIdentifierSegment") + ]), + ]) + .config(|config| { config.optional() }), + ]) + .to_matchable() + .into(), + ), // // NotebookStart=TypedParser("notebook_start", CommentSegment, type="notebook_start"), // // MagicLineGrammar=TypedParser("magic_line", CodeSegment, type="magic_line"), // // MagicStartGrammar=TypedParser("magic_start", CodeSegment, type="magic_start"), @@ -268,39 +247,37 @@ pub fn dialect() -> Dialect { // // ), ]); - databricks.add([ - // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-views.html - // Only difference between this and the SparkSQL version: - // - `LIKE` keyword is optional - ( - "ShowViewsGrammar".into(), + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-views.html + // Only difference between this and the SparkSQL version: + // - `LIKE` keyword is optional + databricks.replace_grammar( + "ShowViewsGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("VIEWS"), + Sequence::new(vec_of_erased![one_of(vec_of_erased![ + Ref::keyword("FROM"), + Ref::keyword("IN"), + ])]) + .config(|config| { + config.optional(); + }), Sequence::new(vec_of_erased![ - Ref::keyword("VIEWS"), - Sequence::new(vec_of_erased![one_of(vec_of_erased![ - Ref::keyword("FROM"), - Ref::keyword("IN"), - ])]) - .config(|config| { - config.optional(); - }), - Sequence::new(vec_of_erased![ - Ref::keyword("LIKE").optional(), - Ref::new("QuotedLiteralSegment"), - ]) - .config(|config| { config.optional() }) + Ref::keyword("LIKE").optional(), + Ref::new("QuotedLiteralSegment"), ]) + .config(|config| { config.optional() }) + ]) + .to_matchable() + .into(), + ); + // TODO Missing Show Object Grammar + databricks.replace_grammar( + "NotNullGrammar".into(), + Sequence::new(vec_of_erased![Ref::keyword("NOT"), Ref::keyword("NULL")]) .to_matchable() .into(), - ), - // TODO Missing Show Object Grammar - ( - "NotNullGrammar".into(), - Sequence::new(vec_of_erased![Ref::keyword("NOT"), Ref::keyword("NULL")]) - .to_matchable() - .into(), - ), // TODO Function NameIdentifierSegment - ]); + ); return databricks; } From c8a07781e3ddb19be5f99eef5a210ae2f05ad5d2 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Wed, 18 Dec 2024 09:48:58 +0100 Subject: [PATCH 07/19] temp --- crates/lib-dialects/src/databricks.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index ddc446d11..12392c856 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -241,10 +241,18 @@ pub fn dialect() -> Dialect { // // NotebookStart=TypedParser("notebook_start", CommentSegment, type="notebook_start"), // // MagicLineGrammar=TypedParser("magic_line", CodeSegment, type="magic_line"), // // MagicStartGrammar=TypedParser("magic_start", CodeSegment, type="magic_start"), - // // VariableNameIdentifierSegment=OneOf( - // // Ref("NakedIdentifierSegment"), - // // Ref("BackQuotedIdentifierSegment"), - // // ), + ( + "VariableNameIdentifierSegment".into(), + one_of(vec_of_erased![ + Ref::new("NakedIdentifierSegment"), + Ref::new("BackQuotedIdentifierSegment"), + ]) + .to_matchable() + .into(), + ), // // VariableNameIdentifierSegment=OneOf( + // // Ref("NakedIdentifierSegment"), + // // Ref("BackQuotedIdentifierSegment"), + // // ), ]); // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-show-views.html From b54fc1a0a8f30d5d72e0d6772152b210992d90ba Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Thu, 19 Dec 2024 11:32:47 +0100 Subject: [PATCH 08/19] temp [ci skip] --- crates/lib-core/src/dialects/base.rs | 1 + crates/lib-dialects/src/databricks.rs | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/crates/lib-core/src/dialects/base.rs b/crates/lib-core/src/dialects/base.rs index 8fad02c1f..d265e2114 100644 --- a/crates/lib-core/src/dialects/base.rs +++ b/crates/lib-core/src/dialects/base.rs @@ -69,6 +69,7 @@ impl Dialect { .unwrap_or_else(|| panic!("Failed to get mutable reference for {name}")) { DialectElementType::Matchable(matchable) => { + println!("Replacing grammar {:?} with new grammar.", matchable); matchable.as_node_matcher().unwrap().match_grammar = match_grammar; } DialectElementType::SegmentGenerator(_) => { diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 12392c856..88ba0fc73 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -259,7 +259,7 @@ pub fn dialect() -> Dialect { // Only difference between this and the SparkSQL version: // - `LIKE` keyword is optional databricks.replace_grammar( - "ShowViewsGrammar".into(), + "ShowViewsStatement".into(), Sequence::new(vec_of_erased![ Ref::keyword("VIEWS"), Sequence::new(vec_of_erased![one_of(vec_of_erased![ @@ -278,6 +278,7 @@ pub fn dialect() -> Dialect { .to_matchable() .into(), ); + // TODO Missing Show Object Grammar databricks.replace_grammar( "NotNullGrammar".into(), From cab7246695b8921d26b6da51bf12fda64bcbdf8d Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Thu, 19 Dec 2024 12:19:08 +0100 Subject: [PATCH 09/19] temp --- crates/lib-core/src/dialects/base.rs | 3 +-- crates/lib-dialects/src/databricks.rs | 18 ++++++++++-------- .../dialects/databricks/{.sqruff => .sqlfluff} | 0 3 files changed, 11 insertions(+), 10 deletions(-) rename crates/lib-dialects/test/fixtures/dialects/databricks/{.sqruff => .sqlfluff} (100%) diff --git a/crates/lib-core/src/dialects/base.rs b/crates/lib-core/src/dialects/base.rs index d265e2114..0ac60a6a9 100644 --- a/crates/lib-core/src/dialects/base.rs +++ b/crates/lib-core/src/dialects/base.rs @@ -3,7 +3,7 @@ use std::collections::hash_map::Entry; use std::fmt::Debug; use ahash::{AHashMap, AHashSet}; - +use itertools::Itertools; use crate::dialects::init::DialectKind; use crate::dialects::syntax::SyntaxKind; use crate::helpers::{capitalize, ToMatchable}; @@ -69,7 +69,6 @@ impl Dialect { .unwrap_or_else(|| panic!("Failed to get mutable reference for {name}")) { DialectElementType::Matchable(matchable) => { - println!("Replacing grammar {:?} with new grammar.", matchable); matchable.as_node_matcher().unwrap().match_grammar = match_grammar; } DialectElementType::SegmentGenerator(_) => { diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 88ba0fc73..4ca36f6ba 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -279,14 +279,16 @@ pub fn dialect() -> Dialect { .into(), ); - // TODO Missing Show Object Grammar - databricks.replace_grammar( - "NotNullGrammar".into(), - Sequence::new(vec_of_erased![Ref::keyword("NOT"), Ref::keyword("NULL")]) - .to_matchable() - .into(), - // TODO Function NameIdentifierSegment - ); + // // TODO Missing Show Object Grammar + // databricks.replace_grammar( + // "NotNullGrammar".into(), + // Sequence::new(vec_of_erased![Ref::keyword("NOT"), Ref::keyword("NULL")]) + // .to_matchable() + // .into(), + // // TODO Function NameIdentifierSegment + // ); + + databricks.expand(); return databricks; } diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/.sqruff b/crates/lib-dialects/test/fixtures/dialects/databricks/.sqlfluff similarity index 100% rename from crates/lib-dialects/test/fixtures/dialects/databricks/.sqruff rename to crates/lib-dialects/test/fixtures/dialects/databricks/.sqlfluff From 0f09194dc9516549a0b12b83a721f11b559060e4 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Thu, 19 Dec 2024 12:45:40 +0100 Subject: [PATCH 10/19] show progress --- crates/lib-dialects/src/databricks.rs | 41 ++- crates/lib-dialects/src/sparksql.rs | 283 +++++++++--------- .../dialects/databricks/show_views.yml | 78 ++--- 3 files changed, 216 insertions(+), 186 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 4ca36f6ba..0863516d4 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -11,11 +11,12 @@ use sqruff_lib_core::{ }, vec_of_erased, }; +use crate::sparksql; pub fn dialect() -> Dialect { - let raw_sparksql = crate::sparksql::dialect(); + let raw_sparksql = sparksql::dialect(); - let mut databricks = crate::sparksql::dialect(); + let mut databricks = sparksql::dialect(); databricks.name = DialectKind::Databricks; // databricks @@ -238,6 +239,16 @@ pub fn dialect() -> Dialect { .to_matchable() .into(), ), + ( + "ShowVolumesStatement".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SHOW"), + Ref::keyword("VOLUMES"), + Ref::new("DatabaseReferenceSegment"), + ]) + .to_matchable() + .into(), + ), // // NotebookStart=TypedParser("notebook_start", CommentSegment, type="notebook_start"), // // MagicLineGrammar=TypedParser("magic_line", CodeSegment, type="magic_line"), // // MagicStartGrammar=TypedParser("magic_start", CodeSegment, type="magic_start"), @@ -261,11 +272,12 @@ pub fn dialect() -> Dialect { databricks.replace_grammar( "ShowViewsStatement".into(), Sequence::new(vec_of_erased![ + Ref::keyword("SHOW"), Ref::keyword("VIEWS"), - Sequence::new(vec_of_erased![one_of(vec_of_erased![ - Ref::keyword("FROM"), - Ref::keyword("IN"), - ])]) + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]), + Ref::new("DatabaseReferenceSegment"), + ]) .config(|config| { config.optional(); }), @@ -279,6 +291,23 @@ pub fn dialect() -> Dialect { .into(), ); + let mut show_statements = sparksql::show_statements(); + show_statements.push( + Ref::new("ShowVolumesStatement") + .to_matchable() + .into() + ); + databricks.replace_grammar( + "ShowStatement".into(), + one_of(show_statements) + .to_matchable() + .into(), + ); + + + + + // // TODO Missing Show Object Grammar // databricks.replace_grammar( // "NotNullGrammar".into(), diff --git a/crates/lib-dialects/src/sparksql.rs b/crates/lib-dialects/src/sparksql.rs index 7ad9022d2..7b8d6274b 100644 --- a/crates/lib-dialects/src/sparksql.rs +++ b/crates/lib-dialects/src/sparksql.rs @@ -10,7 +10,7 @@ use sqruff_lib_core::parser::grammar::conditional::Conditional; use sqruff_lib_core::parser::grammar::delimited::Delimited; use sqruff_lib_core::parser::grammar::sequence::{Bracketed, Sequence}; use sqruff_lib_core::parser::lexer::Matcher; -use sqruff_lib_core::parser::matchable::MatchableTrait; +use sqruff_lib_core::parser::matchable::{Matchable, MatchableTrait}; use sqruff_lib_core::parser::node_matcher::NodeMatcher; use sqruff_lib_core::parser::parsers::{MultiStringParser, RegexParser, StringParser, TypedParser}; use sqruff_lib_core::parser::segments::bracketed::BracketedSegmentMatcher; @@ -2593,7 +2593,9 @@ pub fn dialect() -> Dialect { }) ]) .to_matchable(), - ).to_matchable().into(), + ) + .to_matchable() + .into(), ), ( "SetStatementSegment".into(), @@ -2619,149 +2621,7 @@ pub fn dialect() -> Dialect { "ShowStatement".into(), NodeMatcher::new( SyntaxKind::ShowStatement, - one_of(vec_of_erased![ - Ref::new("ShowViewsStatement"), - Sequence::new(vec_of_erased![ - Ref::keyword("SHOW"), - one_of(vec_of_erased![ - Sequence::new(vec_of_erased![ - Ref::keyword("CREATE"), - Ref::keyword("TABLE"), - Ref::new("TableExpressionSegment"), - Sequence::new(vec_of_erased![ - Ref::keyword("AS"), - Ref::keyword("SERDE") - ]) - .config(|config| { - config.optional(); - }) - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("COLUMNS"), - Ref::keyword("IN"), - Ref::new("TableExpressionSegment"), - Sequence::new(vec_of_erased![ - Ref::keyword("IN"), - Ref::new("DatabaseReferenceSegment") - ]) - .config(|config| { - config.optional(); - }) - ]), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![ - Ref::keyword("DATABASES"), - Ref::keyword("SCHEMAS") - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment") - ]) - .config(|config| { - config.optional(); - }) - ]), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![ - Ref::keyword("USER"), - Ref::keyword("SYSTEM"), - Ref::keyword("ALL") - ]) - .config(|config| { - config.optional(); - }), - Ref::keyword("FUNCTIONS"), - one_of(vec_of_erased![ - Sequence::new(vec_of_erased![ - Ref::new("DatabaseReferenceSegment"), - Ref::new("DotSegment"), - Ref::new("FunctionNameSegment") - ]) - .config(|config| { - config.disallow_gaps(); - config.optional(); - }), - Ref::new("FunctionNameSegment").optional(), - Sequence::new(vec_of_erased![ - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment") - ]) - .config(|config| { - config.optional(); - }) - ]) - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("PARTITIONS"), - Ref::new("TableReferenceSegment"), - Ref::new("PartitionSpecGrammar").optional() - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("TABLE"), - Ref::keyword("EXTENDED"), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![ - Ref::keyword("IN"), - Ref::keyword("FROM") - ]), - Ref::new("DatabaseReferenceSegment") - ]) - .config(|config| { - config.optional(); - }), - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment"), - Ref::new("PartitionSpecGrammar").optional() - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("TABLES"), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![ - Ref::keyword("FROM"), - Ref::keyword("IN") - ]), - Ref::new("DatabaseReferenceSegment") - ]) - .config(|config| { - config.optional(); - }), - Sequence::new(vec_of_erased![ - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment") - ]) - .config(|config| { - config.optional(); - }) - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("TBLPROPERTIES"), - Ref::new("TableReferenceSegment"), - Ref::new("BracketedPropertyNameListGrammar").optional() - ]), - Sequence::new(vec_of_erased![ - Ref::keyword("VIEWS"), - Sequence::new(vec_of_erased![ - one_of(vec_of_erased![ - Ref::keyword("FROM"), - Ref::keyword("IN") - ]), - Ref::new("DatabaseReferenceSegment") - ]) - .config(|config| { - config.optional(); - }), - Sequence::new(vec_of_erased![ - Ref::keyword("LIKE"), - Ref::new("QuotedLiteralSegment") - ]) - .config(|config| { - config.optional(); - }) - ]) - ]) - ]) - ]) - .to_matchable(), + one_of(show_statements()).to_matchable(), ) .to_matchable() .into(), @@ -3547,3 +3407,136 @@ pub fn dialect() -> Dialect { sparksql_dialect.expand(); sparksql_dialect } + +pub fn show_statements() -> Vec { + vec_of_erased![ + Ref::new("ShowViewsStatement"), + Sequence::new(vec_of_erased![ + Ref::keyword("SHOW"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("CREATE"), + Ref::keyword("TABLE"), + Ref::new("TableExpressionSegment"), + Sequence::new(vec_of_erased![Ref::keyword("AS"), Ref::keyword("SERDE")]) + .config(|config| { + config.optional(); + }) + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("COLUMNS"), + Ref::keyword("IN"), + Ref::new("TableExpressionSegment"), + Sequence::new(vec_of_erased![ + Ref::keyword("IN"), + Ref::new("DatabaseReferenceSegment") + ]) + .config(|config| { + config.optional(); + }) + ]), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("DATABASES"), + Ref::keyword("SCHEMAS") + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment") + ]) + .config(|config| { + config.optional(); + }) + ]), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("USER"), + Ref::keyword("SYSTEM"), + Ref::keyword("ALL") + ]) + .config(|config| { + config.optional(); + }), + Ref::keyword("FUNCTIONS"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("DatabaseReferenceSegment"), + Ref::new("DotSegment"), + Ref::new("FunctionNameSegment") + ]) + .config(|config| { + config.disallow_gaps(); + config.optional(); + }), + Ref::new("FunctionNameSegment").optional(), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment") + ]) + .config(|config| { + config.optional(); + }) + ]) + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("PARTITIONS"), + Ref::new("TableReferenceSegment"), + Ref::new("PartitionSpecGrammar").optional() + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("TABLE"), + Ref::keyword("EXTENDED"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("IN"), Ref::keyword("FROM")]), + Ref::new("DatabaseReferenceSegment") + ]) + .config(|config| { + config.optional(); + }), + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment"), + Ref::new("PartitionSpecGrammar").optional() + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("TABLES"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]), + Ref::new("DatabaseReferenceSegment") + ]) + .config(|config| { + config.optional(); + }), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment") + ]) + .config(|config| { + config.optional(); + }) + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("TBLPROPERTIES"), + Ref::new("TableReferenceSegment"), + Ref::new("BracketedPropertyNameListGrammar").optional() + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("VIEWS"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN")]), + Ref::new("DatabaseReferenceSegment") + ]) + .config(|config| { + config.optional(); + }), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE"), + Ref::new("QuotedLiteralSegment") + ]) + .config(|config| { + config.optional(); + }) + ]) + ]) + ]) + ] +} diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml index f18dd5272..2a569b67a 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml @@ -1,53 +1,61 @@ file: - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: FROM - - database_reference: - - naked_identifier: userdb + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - database_reference: + - naked_identifier: userdb - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: IN - - database_reference: - - naked_identifier: global_temp + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: IN + - database_reference: + - naked_identifier: global_temp - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: FROM - - database_reference: - - naked_identifier: default - - keyword: LIKE - - quoted_literal: '''sam*''' + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - database_reference: + - naked_identifier: default + - keyword: LIKE + - quoted_literal: '''sam*''' - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: LIKE - - quoted_literal: '''sam|suj|temp*''' + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: LIKE + - quoted_literal: '''sam|suj|temp*''' - statement_terminator: ; - statement: - show_statement: - - keyword: SHOW - - keyword: VIEWS - - keyword: FROM - - database_reference: - - naked_identifier: default -- file: - - single_quote: '''sam*''' - - semicolon: ; - - word: SHOW - - word: VIEWS - - single_quote: '''sam|suj|temp*''' - - semicolon: ; + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - keyword: FROM + - database_reference: + - naked_identifier: default + - quoted_literal: '''sam*''' +- statement_terminator: ; +- statement: + - show_statement: + - show_views_statement: + - keyword: SHOW + - keyword: VIEWS + - quoted_literal: '''sam|suj|temp*''' +- statement_terminator: ; From 3ab2325f882e39add5c9ec96b68960aa0da69b2f Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Sat, 21 Dec 2024 16:19:36 +0100 Subject: [PATCH 11/19] making progress [ci skip] --- crates/lib-core/src/dialects/base.rs | 1 - crates/lib-dialects/src/databricks.rs | 176 +++++++-- .../dialects/databricks/alter_catalog.yml | 230 +++++++----- .../dialects/databricks/alter_database.yml | 351 +++++++++++------- .../dialects/databricks/show_volumes.yml | 121 +++--- 5 files changed, 562 insertions(+), 317 deletions(-) diff --git a/crates/lib-core/src/dialects/base.rs b/crates/lib-core/src/dialects/base.rs index 0ac60a6a9..6996a51a9 100644 --- a/crates/lib-core/src/dialects/base.rs +++ b/crates/lib-core/src/dialects/base.rs @@ -3,7 +3,6 @@ use std::collections::hash_map::Entry; use std::fmt::Debug; use ahash::{AHashMap, AHashSet}; -use itertools::Itertools; use crate::dialects::init::DialectKind; use crate::dialects::syntax::SyntaxKind; use crate::helpers::{capitalize, ToMatchable}; diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 0863516d4..1a4441334 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -1,7 +1,11 @@ +use std::collections::HashSet; + use crate::databricks_keywords::{RESERVED_KEYWORDS, UNRESERVED_KEYWORDS}; +use crate::sparksql; use sqruff_lib_core::parser::grammar::anyof::AnyNumberOf; use sqruff_lib_core::parser::grammar::delimited::Delimited; use sqruff_lib_core::parser::grammar::sequence::Bracketed; +use sqruff_lib_core::parser::matchable::MatchableTrait; use sqruff_lib_core::{ dialects::{base::Dialect, init::DialectKind, syntax::SyntaxKind}, helpers::{Config, ToMatchable}, @@ -11,7 +15,6 @@ use sqruff_lib_core::{ }, vec_of_erased, }; -use crate::sparksql; pub fn dialect() -> Dialect { let raw_sparksql = sparksql::dialect(); @@ -19,13 +22,35 @@ pub fn dialect() -> Dialect { let mut databricks = sparksql::dialect(); databricks.name = DialectKind::Databricks; - // databricks - // .sets_mut("unreserverd_keywords") - // .extend(UNRESERVED_KEYWORDS); - // databricks - // .sets_mut("unreserverd_keywords") - // .extend(raw_sparksql.sets("reserverd_keywords")); - // databricks.sets_ut("unreserverd_keywords") + // What want to translate from Sqlfluff + // databricks_dialect.sets("unreserved_keywords").update(UNRESERVED_KEYWORDS) + // databricks_dialect.sets("unreserved_keywords").update( + // sparksql_dialect.sets("reserved_keywords") + // ) + // databricks_dialect.sets("unreserved_keywords").difference_update(RESERVED_KEYWORDS) + // databricks_dialect.sets("reserved_keywords").clear() + // databricks_dialect.sets("reserved_keywords").update(RESERVED_KEYWORDS) + // databricks_dialect.sets("date_part_function_name").update(["TIMEDIFF"]) + + databricks + .sets_mut("unreserved_keywords") + .extend(UNRESERVED_KEYWORDS); + databricks + .sets_mut("unreserved_keywords") + .extend(raw_sparksql.sets("reserved_keywords")); + databricks + .sets_mut("unreserved_keywords") + .retain(|x| !RESERVED_KEYWORDS.contains(x)); + databricks.sets_mut("reserved_keywords").clear(); + databricks + .sets_mut("reserved_keywords") + .extend(RESERVED_KEYWORDS); + databricks + .sets_mut("data_part_function_name") + .extend(["TIMEDIFF"]); + + println!("reserved {:?}", databricks.sets("reserved_keywords")); + println!("unreserved {:?}", databricks.sets("unreserved_keywords")); // databricks.sets_mut("reserverd_keywords").clear(); // databricks.sets_mut("reserverd_keywords").extend(RESERVED_KEYWORDS); @@ -75,6 +100,67 @@ pub fn dialect() -> Dialect { // ); databricks.add([ + ( + "CatalogReferenceSegment".into(), + Ref::new("ObjectReferenceSegment").to_matchable().into(), + ), + ( + // SetOwnerGrammar=Sequence( + // Ref.keyword("SET", optional=True), + // "OWNER", + // "TO", + // Ref("PrincipalIdentifierSegment"), + // ), + "SetOwnerGrammar".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SET").optional(), + Ref::keyword("OWNER"), + Ref::keyword("TO"), + Ref::new("PrincipalIdentifierSegment"), + ]) + .to_matchable() + .into(), + ), + ( + "PredictiveOptimizationGrammar".into(), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("ENABLE"), + Ref::keyword("DISABLE"), + Ref::keyword("INHERIT"), + ]), + Ref::keyword("PREDICTIVE"), + Ref::keyword("OPTIMIZATION"), + ]) + .to_matchable() + .into(), + ), + ( + // https://docs.databricks.com/en/sql/language-manual/sql-ref-principal.html + "PrincipalIdentifierSegment".into(), + one_of(vec_of_erased![ + Ref::new("NakedIdentifierSegment"), + Ref::new("BackQuotedIdentifierSegment"), + ]) + .to_matchable() + .into(), + ), + ( + "AlterCatalogStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("CATALOG"), + Ref::new("CatalogReferenceSegment"), + one_of(vec_of_erased![ + Ref::new("SetOwnerGrammar"), + Ref::new("SetTagsGrammar"), + Ref::new("UnsetTagsGrammar"), + Ref::new("PredictiveOptimizationGrammar"), + ]), + ]) + .to_matchable() + .into(), + ), ( "SetTagsGrammar".into(), Sequence::new(vec_of_erased![ @@ -244,7 +330,27 @@ pub fn dialect() -> Dialect { Sequence::new(vec_of_erased![ Ref::keyword("SHOW"), Ref::keyword("VOLUMES"), - Ref::new("DatabaseReferenceSegment"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("FROM"), Ref::keyword("IN"),]), + Ref::new("DatabaseReferenceSegment"), + ]) + .config(|config| { config.optional() }), + Sequence::new(vec_of_erased![ + Ref::keyword("LIKE").optional(), + Ref::new("QuotedLiteralSegment"), + ]) + .config(|config| { config.optional() }), + // "VOLUMES", + // Sequence( + // OneOf("FROM", "IN"), + // Ref("DatabaseReferenceSegment"), + // optional=True, + // ), + // Sequence( + // Ref.keyword("LIKE", optional=True), + // Ref("QuotedLiteralSegment"), + // optional=True, + // ), ]) .to_matchable() .into(), @@ -292,30 +398,48 @@ pub fn dialect() -> Dialect { ); let mut show_statements = sparksql::show_statements(); - show_statements.push( - Ref::new("ShowVolumesStatement") - .to_matchable() - .into() - ); + show_statements.push(Ref::new("ShowVolumesStatement").to_matchable().into()); databricks.replace_grammar( "ShowStatement".into(), - one_of(show_statements) - .to_matchable() - .into(), + one_of(show_statements).to_matchable().into(), ); + // An `ALTER DATABASE/SCHEMA` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-schema.html + databricks.replace_grammar("AlterDatabaseStatementSegment", + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + one_of(vec_of_erased![Ref::keyword("DATABASE"), Ref::keyword("SCHEMA")]), + Ref::new("DatabaseReferenceSegment"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("DatabasePropertiesGrammar"), + ]), + Ref::new("SetOwnerGrammar"), + Ref::new("SetTagsGrammar"), + Ref::new("UnsetTagsGrammar"), + Ref::new("PredictiveOptimizationGrammar"), + ]), + ]).to_matchable().into()); - - // // TODO Missing Show Object Grammar - // databricks.replace_grammar( - // "NotNullGrammar".into(), - // Sequence::new(vec_of_erased![Ref::keyword("NOT"), Ref::keyword("NULL")]) - // .to_matchable() - // .into(), - // // TODO Function NameIdentifierSegment - // ); + databricks.replace_grammar( + "StatementSegment", + raw_sparksql + .grammar("StatementSegment") + .match_grammar() + .unwrap() + .copy( + Some(vec_of_erased![Ref::new("AlterCatalogStatementSegment"),]), + None, + None, + None, + Vec::new(), + false, + ), + ); databricks.expand(); diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml index 85dc97f49..076f64d0d 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_catalog.yml @@ -1,99 +1,133 @@ file: -- unparsable: - - word: ALTER - - word: CATALOG - - word: some_cat - - word: OWNER - - word: TO - - back_quote: '`alf@melmak.et`' - - semicolon: ; - - word: ALTER - - word: CATALOG - - word: some_cat - - word: OWNER - - word: TO - - word: my_group - - semicolon: ; - - word: ALTER - - word: CATALOG - - word: some_cat - - word: SET - - word: OWNER - - word: TO - - back_quote: '`alf@melmak.et`' - - semicolon: ; - - word: ALTER - - word: CATALOG - - word: some_cat - - word: SET - - word: OWNER - - word: TO - - word: my_group - - semicolon: ; - - word: ALTER - - word: CATALOG - - word: some_cat - - word: SET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag1''' - - raw_comparison_operator: = - - single_quote: '''value1''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: CATALOG - - word: some_cat - - word: SET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag2''' - - raw_comparison_operator: = - - single_quote: '''value2''' - - comma: ',' - - single_quote: '''tag3''' - - raw_comparison_operator: = - - single_quote: '''value3''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: CATALOG - - word: some_cat - - word: UNSET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag1''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: CATALOG - - word: some_cat - - word: UNSET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag2''' - - comma: ',' - - single_quote: '''tag3''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: CATALOG - - word: some_cat - - word: ENABLE - - word: PREDICTIVE - - word: OPTIMIZATION - - semicolon: ; - - word: ALTER - - word: CATALOG - - word: some_cat - - word: INHERIT - - word: PREDICTIVE - - word: OPTIMIZATION - - semicolon: ; - - word: ALTER - - word: CATALOG - - word: some_cat - - word: DISABLE - - word: PREDICTIVE - - word: OPTIMIZATION - - semicolon: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value3''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: ENABLE + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: INHERIT + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: CATALOG + - object_reference: + - naked_identifier: some_cat + - keyword: DISABLE + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml index e2dcb1c92..5b326887f 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml @@ -1,144 +1,209 @@ file: -- unparsable: - - word: ALTER - - word: SCHEMA - - word: some_cat - - word: OWNER - - word: TO - - back_quote: '`alf@melmak.et`' - - semicolon: ; - - word: ALTER - - word: SCHEMA - - word: some_cat - - word: OWNER - - word: TO - - word: my_group - - semicolon: ; - - word: ALTER - - word: SCHEMA - - word: some_cat - - word: SET - - word: OWNER - - word: TO - - back_quote: '`alf@melmak.et`' - - semicolon: ; - - word: ALTER - - word: SCHEMA - - word: some_cat - - word: SET - - word: OWNER - - word: TO - - word: my_group - - semicolon: ; - - word: ALTER - - word: DATABASE - - word: some_cat - - word: OWNER - - word: TO - - back_quote: '`alf@melmak.et`' - - semicolon: ; - - word: ALTER - - word: DATABASE - - word: some_cat - - word: SET - - word: OWNER - - word: TO - - back_quote: '`alf@melmak.et`' - - semicolon: ; - - word: ALTER - - word: DATABASE - - word: some_cat - - word: OWNER - - word: TO - - word: my_group - - semicolon: ; - - word: ALTER - - word: DATABASE - - word: some_cat - - word: SET - - word: OWNER - - word: TO - - word: my_group - - semicolon: ; - - word: ALTER - - word: SCHEMA - - word: some_cat - - word: SET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag1''' - - raw_comparison_operator: = - - single_quote: '''value1''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: DATABASE - - word: some_cat - - word: SET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag2''' - - raw_comparison_operator: = - - single_quote: '''value2''' - - comma: ',' - - single_quote: '''tag3''' - - raw_comparison_operator: = - - single_quote: '''value3''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: DATABASE - - word: some_cat - - word: UNSET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag1''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: SCHEMA - - word: some_cat - - word: UNSET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag2''' - - comma: ',' - - single_quote: '''tag3''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: SCHEMA - - word: some_cat - - word: ENABLE - - word: PREDICTIVE - - word: OPTIMIZATION - - semicolon: ; - - word: ALTER - - word: DATABASE - - word: some_cat - - word: INHERIT - - word: PREDICTIVE - - word: OPTIMIZATION - - semicolon: ; - - word: ALTER - - word: SCHEMA - - word: some_cat - - word: DISABLE - - word: PREDICTIVE - - word: OPTIMIZATION - - semicolon: ; - - word: ALTER - - word: SCHEMA - - word: some_cat - - word: SET - - word: DBPROPERTIES - - start_bracket: ( - - single_quote: '''Edited-by''' - - raw_comparison_operator: = - - single_quote: '''John Doe''' - - comma: ',' - - single_quote: '''Edit-date''' - - raw_comparison_operator: = - - single_quote: '''2020-01-01''' - - end_bracket: ) - - semicolon: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - database_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - database_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - database_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - database_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - database_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - database_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - database_reference: + - naked_identifier: some_cat + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - database_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - database_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - database_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value3''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - database_reference: + - naked_identifier: some_cat + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - database_reference: + - naked_identifier: some_cat + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - database_reference: + - naked_identifier: some_cat + - keyword: ENABLE + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: DATABASE + - database_reference: + - naked_identifier: some_cat + - keyword: INHERIT + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - database_reference: + - naked_identifier: some_cat + - keyword: DISABLE + - keyword: PREDICTIVE + - keyword: OPTIMIZATION +- statement_terminator: ; +- statement: + - alter_database_statement: + - keyword: ALTER + - keyword: SCHEMA + - database_reference: + - naked_identifier: some_cat + - keyword: SET + - keyword: DBPROPERTIES + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''Edited-by''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''John Doe''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''Edit-date''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''2020-01-01''' + - end_bracket: ) +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml index a37bd35fd..a1b696b73 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml @@ -1,50 +1,73 @@ file: -- unparsable: - - word: SHOW - - word: VOLUMES - - semicolon: ; - - word: SHOW - - word: VOLUMES - - word: IN - - word: sampledb - - semicolon: ; - - word: SHOW - - word: VOLUMES - - word: FROM - - word: sampledb - - semicolon: ; - - word: SHOW - - word: VOLUMES - - word: LIKE - - single_quote: '''regex*''' - - semicolon: ; - - word: SHOW - - word: VOLUMES - - single_quote: '''regex*''' - - semicolon: ; - - word: SHOW - - word: VOLUMES - - word: IN - - word: sampledb - - word: LIKE - - single_quote: '''regex*''' - - semicolon: ; - - word: SHOW - - word: VOLUMES - - word: IN - - word: sampledb - - single_quote: '''regex*''' - - semicolon: ; - - word: SHOW - - word: VOLUMES - - word: FROM - - word: sampledb - - word: LIKE - - single_quote: '''regex*''' - - semicolon: ; - - word: SHOW - - word: VOLUMES - - word: FROM - - word: sampledb - - single_quote: '''regex*''' - - semicolon: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: IN + - database_reference: + - naked_identifier: sampledb +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: FROM + - database_reference: + - naked_identifier: sampledb +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: LIKE + - quoted_literal: '''regex*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - quoted_literal: '''regex*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: IN + - database_reference: + - naked_identifier: sampledb + - keyword: LIKE + - quoted_literal: '''regex*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: IN + - database_reference: + - naked_identifier: sampledb + - quoted_literal: '''regex*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: FROM + - database_reference: + - naked_identifier: sampledb + - keyword: LIKE + - quoted_literal: '''regex*''' +- statement_terminator: ; +- statement: + - show_statement: + - keyword: SHOW + - keyword: VOLUMES + - keyword: FROM + - database_reference: + - naked_identifier: sampledb + - quoted_literal: '''regex*''' +- statement_terminator: ; From b558b9f2961ef8b509d259183c093e86c7ed7ca8 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Sat, 21 Dec 2024 16:41:30 +0100 Subject: [PATCH 12/19] chore: making progress alter volume [ci skip] --- crates/lib-dialects/src/databricks.rs | 439 +++++++++++++++++- .../dialects/databricks/alter_table.yml | 87 ++-- .../dialects/databricks/alter_volume.yml | 199 ++++---- 3 files changed, 571 insertions(+), 154 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 1a4441334..8438982c5 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -6,6 +6,7 @@ use sqruff_lib_core::parser::grammar::anyof::AnyNumberOf; use sqruff_lib_core::parser::grammar::delimited::Delimited; use sqruff_lib_core::parser::grammar::sequence::Bracketed; use sqruff_lib_core::parser::matchable::MatchableTrait; +use sqruff_lib_core::parser::segments::meta::MetaSegment; use sqruff_lib_core::{ dialects::{base::Dialect, init::DialectKind, syntax::SyntaxKind}, helpers::{Config, ToMatchable}, @@ -406,24 +407,427 @@ pub fn dialect() -> Dialect { // An `ALTER DATABASE/SCHEMA` statement. // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-schema.html - databricks.replace_grammar("AlterDatabaseStatementSegment", - Sequence::new(vec_of_erased![ - Ref::keyword("ALTER"), - one_of(vec_of_erased![Ref::keyword("DATABASE"), Ref::keyword("SCHEMA")]), - Ref::new("DatabaseReferenceSegment"), - one_of(vec_of_erased![ - Sequence::new(vec_of_erased![ - Ref::keyword("SET"), - Ref::new("DatabasePropertiesGrammar"), + databricks.replace_grammar( + "AlterDatabaseStatementSegment", + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + one_of(vec_of_erased![ + Ref::keyword("DATABASE"), + Ref::keyword("SCHEMA") ]), - Ref::new("SetOwnerGrammar"), - Ref::new("SetTagsGrammar"), - Ref::new("UnsetTagsGrammar"), - Ref::new("PredictiveOptimizationGrammar"), - ]), - ]).to_matchable().into()); + Ref::new("DatabaseReferenceSegment"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("DatabasePropertiesGrammar"), + ]), + Ref::new("SetOwnerGrammar"), + Ref::new("SetTagsGrammar"), + Ref::new("UnsetTagsGrammar"), + Ref::new("PredictiveOptimizationGrammar"), + ]), + ]) + .to_matchable() + .into(), + ); + // An `ALTER TABLE` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-table.html + // match_grammar = Sequence( + // "ALTER", + // "TABLE", + // Ref("TableReferenceSegment"), + // Indent, + // OneOf( + // Sequence( + // "RENAME", + // "TO", + // Ref("TableReferenceSegment"), + // ), + // Sequence( + // "ADD", + // OneOf("COLUMNS", "COLUMN"), + // Indent, + // Bracketed( + // Delimited( + // Sequence( + // Ref("ColumnFieldDefinitionSegment"), + // Ref("ColumnDefaultGrammar", optional=True), + // Ref("CommentGrammar", optional=True), + // Ref("FirstOrAfterGrammar", optional=True), + // Ref("MaskStatementSegment", optional=True), + // ), + // ), + // ), + // Dedent, + // ), + // Sequence( + // OneOf("ALTER", "CHANGE"), + // Ref.keyword("COLUMN", optional=True), + // Ref("ColumnReferenceSegment"), + // OneOf( + // Ref("CommentGrammar"), + // Ref("FirstOrAfterGrammar"), + // Sequence( + // OneOf("SET", "DROP"), + // "NOT", + // "NULL", + // ), + // Sequence( + // "TYPE", + // Ref("DatatypeSegment"), + // ), + // Sequence( + // "SET", + // Ref("ColumnDefaultGrammar"), + // ), + // Sequence( + // "DROP", + // "DEFAULT", + // ), + // Sequence( + // "SYNC", + // "IDENTITY", + // ), + // Sequence( + // "SET", + // Ref("MaskStatementSegment"), + // ), + // Sequence( + // "DROP", + // "MASK", + // ), + // Ref("SetTagsGrammar"), + // Ref("UnsetTagsGrammar"), + // ), + // ), + // Sequence( + // "DROP", + // OneOf("COLUMN", "COLUMNS", optional=True), + // Ref("IfExistsGrammar", optional=True), + // OptionallyBracketed( + // Delimited( + // Ref("ColumnReferenceSegment"), + // ), + // ), + // ), + // Sequence( + // "RENAME", + // "COLUMN", + // Ref("ColumnReferenceSegment"), + // "TO", + // Ref("ColumnReferenceSegment"), + // ), + // Sequence( + // "ADD", + // Ref("TableConstraintSegment"), + // ), + // Ref("DropConstraintGrammar"), + // Sequence( + // "DROP", + // "FEATURE", + // Ref("ObjectReferenceSegment"), + // Sequence( + // "TRUNCATE", + // "HISTORY", + // optional=True, + // ), + // ), + // Sequence( + // "ADD", + // Ref("IfNotExistsGrammar", optional=True), + // AnyNumberOf(Ref("AlterPartitionGrammar")), + // ), + // Sequence( + // "DROP", + // Ref("IfExistsGrammar", optional=True), + // AnyNumberOf(Ref("AlterPartitionGrammar")), + // ), + // Sequence( + // Ref("AlterPartitionGrammar"), + // "SET", + // Ref("LocationGrammar"), + // ), + // Sequence( + // Ref("AlterPartitionGrammar"), + // "RENAME", + // "TO", + // Ref("AlterPartitionGrammar"), + // ), + // Sequence( + // "RECOVER", + // "PARTITIONS", + // ), + // Sequence( + // "SET", + // Ref("RowFilterClauseGrammar"), + // ), + // Sequence( + // "DROP", + // "ROW", + // "FILTER", + // ), + // Sequence( + // "SET", + // Ref("TablePropertiesGrammar"), + // ), + // Ref("UnsetTablePropertiesGrammar"), + // Sequence( + // "SET", + // "SERDE", + // Ref("QuotedLiteralSegment"), + // Sequence( + // "WITH", + // "SERDEPROPERTIES", + // Ref("BracketedPropertyListGrammar"), + // optional=True, + // ), + // ), + // Sequence( + // "SET", + // Ref("LocationGrammar"), + // ), + // Ref("SetOwnerGrammar"), + // Sequence( + // Sequence( + // "ALTER", + // "COLUMN", + // Ref("ColumnReferenceSegment"), + // optional=True, + // ), + // Ref("SetTagsGrammar"), + // ), + // Sequence( + // Sequence( + // "ALTER", + // "COLUMN", + // Ref("ColumnReferenceSegment"), + // optional=True, + // ), + // Ref("UnsetTagsGrammar"), + // ), + // Ref("ClusterByClauseSegment"), + // Ref("PredictiveOptimizationGrammar"), + // ), + // Dedent, + // ) + databricks.replace_grammar( + "AlterTableStatementSegment", + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("TABLE"), + Ref::new("TableReferenceSegment"), + MetaSegment::indent(), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("RENAME"), + Ref::keyword("TO"), + Ref::new("TableReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("ADD"), + one_of(vec_of_erased![ + Ref::keyword("COLUMNS"), + Ref::keyword("COLUMN") + ]), + MetaSegment::indent(), + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("ColumnFieldDefinitionSegment"), + Ref::new("ColumnDefaultGrammar").optional(), + Ref::new("CommentGrammar").optional(), + Ref::new("FirstOrAfterGrammar").optional(), + Ref::new("MaskStatementSegment").optional(), + ]), + ]),]), + MetaSegment::dedent(), + ]), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("CHANGE") + ]), + Ref::keyword("COLUMN").optional(), + Ref::new("ColumnReferenceSegment"), + one_of(vec_of_erased![ + Ref::new("CommentGrammar"), + Ref::new("FirstOrAfterGrammar"), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![Ref::keyword("SET"), Ref::keyword("DROP")]), + Ref::keyword("NOT"), + Ref::keyword("NULL"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("TYPE"), + Ref::new("DatatypeSegment"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("ColumnDefaultGrammar"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::keyword("DEFAULT"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SYNC"), + Ref::keyword("IDENTITY"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("MaskStatementSegment"), + ]), + Sequence::new(vec_of_erased![Ref::keyword("DROP"), Ref::keyword("MASK"),]), + Ref::new("SetTagsGrammar"), + Ref::new("UnsetTagsGrammar"), + ]), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + one_of(vec_of_erased![ + Ref::keyword("COLUMN"), + Ref::keyword("COLUMNS") + ]) + .config(|config| { config.optional() }), + Ref::new("IfExistsGrammar").optional(), + one_of(vec_of_erased![ + Delimited::new(vec_of_erased![Ref::new("ColumnReferenceSegment")]), + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![Ref::new( + "ColumnReferenceSegment" + )]),]), + ]), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("RENAME"), + Ref::keyword("COLUMN"), + Ref::new("ColumnReferenceSegment"), + Ref::keyword("TO"), + Ref::new("ColumnReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("ADD"), + Ref::new("TableConstraintSegment"), + ]), + Ref::new("DropConstraintGrammar"), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::keyword("FEATURE"), + Ref::new("ObjectReferenceSegment"), + Sequence::new(vec_of_erased![ + Ref::keyword("TRUNCATE"), + Ref::keyword("HISTORY"), + ]) + .config(|config| { config.optional() }), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("ADD"), + Ref::new("IfNotExistsGrammar").optional(), + AnyNumberOf::new(vec_of_erased![Ref::new("AlterPartitionGrammar"),]), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::new("IfExistsGrammar").optional(), + AnyNumberOf::new(vec_of_erased![Ref::new("AlterPartitionGrammar"),]), + ]), + Sequence::new(vec_of_erased![ + Ref::new("AlterPartitionGrammar"), + Ref::keyword("SET"), + Ref::new("LocationGrammar"), + ]), + Sequence::new(vec_of_erased![ + Ref::new("AlterPartitionGrammar"), + Ref::keyword("RENAME"), + Ref::keyword("TO"), + Ref::new("AlterPartitionGrammar"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("RECOVER"), + Ref::keyword("PARTITIONS"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("RowFilterClauseGrammar"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::keyword("ROW"), + Ref::keyword("FILTER"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("TablePropertiesGrammar"), + ]), + Ref::new("UnsetTablePropertiesGrammar"), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::keyword("SERDE"), + Ref::new("QuotedLiteralSegment"), + Sequence::new(vec_of_erased![ + Ref::keyword("WITH"), + Ref::keyword("SERDEPROPERTIES"), + Ref::new("BracketedPropertyListGrammar"), + ]) + .config(|config| { config.optional() }), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::new("LocationGrammar"), + ]), + Ref::new("SetOwnerGrammar"), + Sequence::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("COLUMN"), + Ref::new("ColumnReferenceSegment"), + ]) + .config(|config| { config.optional() }), + Ref::new("SetTagsGrammar"), + ]), + Sequence::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("COLUMN"), + Ref::new("ColumnReferenceSegment"), + ]) + .config(|config| { config.optional() }), + Ref::new("UnsetTagsGrammar"), + ]), + Ref::new("ClusterByClauseSegment"), + Ref::new("PredictiveOptimizationGrammar"), + ]), + MetaSegment::dedent(), + ]) + .to_matchable() + .into(), + ); + databricks.add([( + "VolumeReferenceSegment".into(), + Ref::new("ObjectReferenceSegment").to_matchable().into(), + )]); + + // An `ALTER VOLUME` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-alter-volume.html + databricks.add([( + "AlterVolumeStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("ALTER"), + Ref::keyword("VOLUME"), + Ref::new("VolumeReferenceSegment"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("RENAME"), + Ref::keyword("TO"), + Ref::new("VolumeReferenceSegment"), + ]), + Ref::new("SetOwnerGrammar"), + Ref::new("SetTagsGrammar"), + Ref::new("UnsetTagsGrammar"), + ]), + ]) + .to_matchable() + .into(), + )]); databricks.replace_grammar( "StatementSegment", @@ -432,7 +836,10 @@ pub fn dialect() -> Dialect { .match_grammar() .unwrap() .copy( - Some(vec_of_erased![Ref::new("AlterCatalogStatementSegment"),]), + Some(vec_of_erased![ + Ref::new("AlterCatalogStatementSegment"), + Ref::new("AlterVolumeStatementSegment"), + ]), None, None, None, diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml index 13c021adc..84d9de44a 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_table.yml @@ -10,61 +10,40 @@ file: - table_reference: - naked_identifier: StudentInfo - statement_terminator: ; -- statement: - - alter_table_statement: - - keyword: ALTER - - keyword: TABLE - - table_reference: - - naked_identifier: default - - dot: . - - naked_identifier: StudentInfo - - keyword: PARTITION - - bracketed: - - start_bracket: ( - - column_reference: - - naked_identifier: age - - comparison_operator: - - raw_comparison_operator: = - - quoted_literal: '''10''' - - end_bracket: ) - - keyword: RENAME - - keyword: TO - - keyword: PARTITION - - bracketed: - - start_bracket: ( - - column_reference: - - naked_identifier: age - - comparison_operator: - - raw_comparison_operator: = - - quoted_literal: '''15''' - - end_bracket: ) -- statement_terminator: ; -- statement: - - alter_table_statement: - - keyword: ALTER - - keyword: TABLE - - table_reference: - - naked_identifier: StudentInfo - - keyword: ADD - - keyword: columns - - bracketed: - - start_bracket: ( - - column_definition: - - column_reference: - - naked_identifier: LastName - - data_type: - - primitive_type: - - keyword: string - - comma: ',' - - column_definition: - - column_reference: - - naked_identifier: DOB - - data_type: - - primitive_type: - - keyword: timestamp - - end_bracket: ) -- statement_terminator: ; - file: + - word: ALTER + - word: TABLE + - word: default + - dot: . + - word: StudentInfo + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - single_quote: '''10''' + - end_bracket: ) + - word: RENAME + - word: TO + - word: PARTITION + - start_bracket: ( + - word: age + - raw_comparison_operator: = + - single_quote: '''15''' + - end_bracket: ) + - semicolon: ; + - word: ALTER + - word: TABLE + - word: StudentInfo + - word: ADD + - word: columns + - start_bracket: ( + - word: LastName + - word: string + - comma: ',' + - word: DOB + - word: timestamp + - end_bracket: ) + - semicolon: ; - word: ALTER - word: TABLE - word: StudentInfo diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml index 5c6fa05c9..ad6ab7d6e 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_volume.yml @@ -1,85 +1,116 @@ file: -- unparsable: - - word: ALTER - - word: VOLUME - - word: some_vol - - word: RENAME - - word: TO - - word: some_new_vol - - semicolon: ; - - word: ALTER - - word: VOLUME - - word: some_vol - - word: OWNER - - word: TO - - back_quote: '`alf@melmak.et`' - - semicolon: ; - - word: ALTER - - word: VOLUME - - word: some_vol - - word: OWNER - - word: TO - - word: my_group - - semicolon: ; - - word: ALTER - - word: VOLUME - - word: some_vol - - word: SET - - word: OWNER - - word: TO - - back_quote: '`alf@melmak.et`' - - semicolon: ; - - word: ALTER - - word: VOLUME - - word: some_vol - - word: SET - - word: OWNER - - word: TO - - word: my_group - - semicolon: ; - - word: ALTER - - word: VOLUME - - word: some_vol - - word: SET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag1''' - - raw_comparison_operator: = - - single_quote: '''value1''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: VOLUME - - word: some_vol - - word: SET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag2''' - - raw_comparison_operator: = - - single_quote: '''value2''' - - comma: ',' - - single_quote: '''tag3''' - - raw_comparison_operator: = - - single_quote: '''value3''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: VOLUME - - word: some_vol - - word: UNSET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag1''' - - end_bracket: ) - - semicolon: ; - - word: ALTER - - word: VOLUME - - word: some_vol - - word: UNSET - - word: TAGS - - start_bracket: ( - - single_quote: '''tag2''' - - comma: ',' - - single_quote: '''tag3''' - - end_bracket: ) - - semicolon: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: RENAME + - keyword: TO + - object_reference: + - naked_identifier: some_new_vol +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: SET + - keyword: OWNER + - keyword: TO + - quoted_identifier: '`alf@melmak.et`' +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: SET + - keyword: OWNER + - keyword: TO + - naked_identifier: my_group +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: SET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''value3''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag1''' + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: ALTER + - keyword: VOLUME + - object_reference: + - naked_identifier: some_vol + - keyword: UNSET + - keyword: TAGS + - bracketed: + - start_bracket: ( + - property_name_identifier: + - quoted_identifier: '''tag2''' + - comma: ',' + - property_name_identifier: + - quoted_identifier: '''tag3''' + - end_bracket: ) +- statement_terminator: ; From 122f7c12bc9112c6cb0bee56828fdfc92a63f6bb Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Sat, 21 Dec 2024 16:49:49 +0100 Subject: [PATCH 13/19] temp [ci skip] --- crates/lib-dialects/src/databricks.rs | 64 +++++++ .../dialects/databricks/comment_on.yml | 161 ++++++++++-------- .../dialects/databricks/create_catalog.yml | 47 ++--- 3 files changed, 180 insertions(+), 92 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 8438982c5..f93789ea0 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -801,6 +801,53 @@ pub fn dialect() -> Dialect { .into(), ); + // `COMMENT ON` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-comment.html + databricks.add([( + "CommentOnStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("COMMENT"), + Ref::keyword("ON"), + one_of(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("CATALOG"), + Ref::new("CatalogReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("DATABASE"), + Ref::keyword("SCHEMA") + ]), + Ref::new("DatabaseReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("TABLE"), + Ref::new("TableReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + Ref::keyword("VOLUME"), + Ref::new("VolumeReferenceSegment"), + ]), + Sequence::new(vec_of_erased![ + one_of(vec_of_erased![ + Ref::keyword("CONNECTION"), + Ref::keyword("PROVIDER"), + Ref::keyword("RECIPIENT"), + Ref::keyword("SHARE"), + ]), + Ref::new("ObjectReferenceSegment"), + ]), + ]), + Ref::keyword("IS"), + one_of(vec_of_erased![ + Ref::new("QuotedLiteralSegment"), + Ref::keyword("NULL"), + ]), + ]) + .to_matchable() + .into(), + )]); + databricks.add([( "VolumeReferenceSegment".into(), Ref::new("ObjectReferenceSegment").to_matchable().into(), @@ -829,6 +876,21 @@ pub fn dialect() -> Dialect { .into(), )]); + // A `CREATE CATALOG` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-catalog.html + databricks.add([( + "CreateCatalogStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("CREATE"), + Ref::keyword("CATALOG"), + Ref::new("IfNotExistsGrammar").optional(), + Ref::new("CatalogReferenceSegment"), + Ref::new("CommentGrammar").optional(), + ]) + .to_matchable() + .into(), + )]); + databricks.replace_grammar( "StatementSegment", raw_sparksql @@ -839,6 +901,8 @@ pub fn dialect() -> Dialect { Some(vec_of_erased![ Ref::new("AlterCatalogStatementSegment"), Ref::new("AlterVolumeStatementSegment"), + Ref::new("CommentOnStatementSegment"), + Ref::new("CreateCatalogStatementSegment"), ]), None, None, diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml index 2e09a2b92..8c004055e 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml @@ -1,72 +1,91 @@ file: -- unparsable: - - word: COMMENT - - word: ON - - word: CATALOG - - word: my_catalog - - word: IS - - single_quote: '''This is my catalog''' - - semicolon: ; - - word: COMMENT - - word: ON - - word: CONNECTION - - word: mysql_connection - - word: IS - - single_quote: '''this is a mysql connection''' - - semicolon: ; - - word: COMMENT - - word: ON - - word: SCHEMA - - word: my_schema - - word: IS - - single_quote: '''This is my schema''' - - semicolon: ; - - word: COMMENT - - word: ON - - word: DATABASE - - word: my_other_schema - - word: IS - - single_quote: '''This is my other schema''' - - semicolon: ; - - word: COMMENT - - word: ON - - word: TABLE - - word: my_table - - word: IS - - single_quote: '''This is my table''' - - semicolon: ; - - word: COMMENT - - word: ON - - word: TABLE - - word: my_table - - word: IS - - word: 'NULL' - - semicolon: ; - - word: COMMENT - - word: ON - - word: SHARE - - word: my_share - - word: IS - - single_quote: '''A good share''' - - semicolon: ; - - word: COMMENT - - word: ON - - word: RECIPIENT - - word: my_recipient - - word: IS - - single_quote: '''A good recipient''' - - semicolon: ; - - word: COMMENT - - word: ON - - word: PROVIDER - - word: my_provider - - word: IS - - single_quote: '''A good provider''' - - semicolon: ; - - word: COMMENT - - word: ON - - word: VOLUME - - word: my_volume - - word: IS - - single_quote: '''Huge volume''' - - semicolon: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: CATALOG + - object_reference: + - naked_identifier: my_catalog + - keyword: IS + - quoted_literal: '''This is my catalog''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: CONNECTION + - object_reference: + - naked_identifier: mysql_connection + - keyword: IS + - quoted_literal: '''this is a mysql connection''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: SCHEMA + - database_reference: + - naked_identifier: my_schema + - keyword: IS + - quoted_literal: '''This is my schema''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: DATABASE + - database_reference: + - naked_identifier: my_other_schema + - keyword: IS + - quoted_literal: '''This is my other schema''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: TABLE + - table_reference: + - naked_identifier: my_table + - keyword: IS + - quoted_literal: '''This is my table''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: TABLE + - table_reference: + - naked_identifier: my_table + - keyword: IS + - keyword: 'NULL' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: SHARE + - object_reference: + - naked_identifier: my_share + - keyword: IS + - quoted_literal: '''A good share''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: RECIPIENT + - object_reference: + - naked_identifier: my_recipient + - keyword: IS + - quoted_literal: '''A good recipient''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: PROVIDER + - object_reference: + - naked_identifier: my_provider + - keyword: IS + - quoted_literal: '''A good provider''' +- statement_terminator: ; +- statement: + - keyword: COMMENT + - keyword: ON + - keyword: VOLUME + - object_reference: + - naked_identifier: my_volume + - keyword: IS + - quoted_literal: '''Huge volume''' +- statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml index d6177eeb2..bd6cb1070 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_catalog.yml @@ -1,22 +1,27 @@ file: -- unparsable: - - word: CREATE - - word: CATALOG - - word: customer_cat - - semicolon: ; - - word: CREATE - - word: CATALOG - - word: IF - - word: NOT - - word: EXISTS - - word: customer_cat - - semicolon: ; - - word: CREATE - - word: CATALOG - - word: IF - - word: NOT - - word: EXISTS - - word: customer_cat - - word: COMMENT - - single_quote: '''This is customer catalog''' - - semicolon: ; +- statement: + - keyword: CREATE + - keyword: CATALOG + - object_reference: + - naked_identifier: customer_cat +- statement_terminator: ; +- statement: + - keyword: CREATE + - keyword: CATALOG + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - object_reference: + - naked_identifier: customer_cat +- statement_terminator: ; +- statement: + - keyword: CREATE + - keyword: CATALOG + - keyword: IF + - keyword: NOT + - keyword: EXISTS + - object_reference: + - naked_identifier: customer_cat + - keyword: COMMENT + - quoted_literal: '''This is customer catalog''' +- statement_terminator: ; From 51a44e3d7e37e18b7a5435555759acc3b84f57d5 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Sat, 21 Dec 2024 18:54:21 +0100 Subject: [PATCH 14/19] drop catalog statements [ci skip] --- crates/lib-dialects/src/databricks.rs | 16 ++++++++++ .../dialects/databricks/drop_catalog.yml | 29 ++++++++++--------- 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index f93789ea0..235ea52ab 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -891,6 +891,21 @@ pub fn dialect() -> Dialect { .into(), )]); + // A `DROP CATALOG` statement. + // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-drop-catalog.html + databricks.add([( + "DropCatalogStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::keyword("CATALOG"), + Ref::new("IfExistsGrammar").optional(), + Ref::new("CatalogReferenceSegment"), + Ref::new("DropBehaviorGrammar").optional(), + ]) + .to_matchable() + .into(), + )]); + databricks.replace_grammar( "StatementSegment", raw_sparksql @@ -900,6 +915,7 @@ pub fn dialect() -> Dialect { .copy( Some(vec_of_erased![ Ref::new("AlterCatalogStatementSegment"), + Ref::new("DropCatalogStatementSegment"), Ref::new("AlterVolumeStatementSegment"), Ref::new("CommentOnStatementSegment"), Ref::new("CreateCatalogStatementSegment"), diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml index febeead2f..ee3e98eb2 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_catalog.yml @@ -1,14 +1,17 @@ file: -- unparsable: - - word: DROP - - word: CATALOG - - word: vaccine - - word: CASCADE - - semicolon: ; - - word: DROP - - word: CATALOG - - word: IF - - word: EXISTS - - word: vaccine - - word: RESTRICT - - semicolon: ; +- statement: + - keyword: DROP + - keyword: CATALOG + - object_reference: + - naked_identifier: vaccine + - keyword: CASCADE +- statement_terminator: ; +- statement: + - keyword: DROP + - keyword: CATALOG + - keyword: IF + - keyword: EXISTS + - object_reference: + - naked_identifier: vaccine + - keyword: RESTRICT +- statement_terminator: ; From cd29cf6634a241141e87d1032f4d2eb2489868a2 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Sat, 21 Dec 2024 19:11:45 +0100 Subject: [PATCH 15/19] feat implemented set variable [ci skip] --- crates/lib-dialects/src/databricks.rs | 40 +++ crates/lib-dialects/src/sparksql_keywords.rs | 2 + .../dialects/databricks/set_variable.yml | 237 +++++++++++------- 3 files changed, 193 insertions(+), 86 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 235ea52ab..7ebed3cee 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -906,6 +906,45 @@ pub fn dialect() -> Dialect { .into(), )]); + // A `SET VARIABLE` statement used to set session variables. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-set-variable.html + // set var v1=val, v2=val2; + // # set var v1=val, v2=val2; + let kv_pair = Sequence::new(vec_of_erased![Delimited::new(vec_of_erased![ + Ref::new("VariableNameIdentifierSegment"), + Ref::new("EqualsSegment"), + one_of(vec_of_erased![ + Ref::keyword("DEFAULT"), + one_of(vec_of_erased![ + Bracketed::new(vec_of_erased![Ref::new("ExpressionSegment")]), + Ref::new("ExpressionSegment"), + ]), + ]), + ])]); + // set var (v1,v2) = (values(100,200)) + let bracketed_kv_pair = Sequence::new(vec_of_erased![ + Bracketed::new(vec_of_erased![Ref::new("VariableNameIdentifierSegment")]), + Ref::new("EqualsSegment"), + Bracketed::new(vec_of_erased![one_of(vec_of_erased![ + Ref::new("SelectStatementSegment"), + Ref::new("ValuesClauseSegment"), + ]),]), + ]); + databricks.add([( + "SetVariableStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + one_of(vec_of_erased![ + Ref::keyword("VAR"), + Ref::keyword("VARIABLE"), + ]), + one_of(vec_of_erased![kv_pair.clone(), bracketed_kv_pair.clone(),]) + .config(|config| config.allow_gaps = true), + ]) + .to_matchable() + .into(), + )]); + databricks.replace_grammar( "StatementSegment", raw_sparksql @@ -919,6 +958,7 @@ pub fn dialect() -> Dialect { Ref::new("AlterVolumeStatementSegment"), Ref::new("CommentOnStatementSegment"), Ref::new("CreateCatalogStatementSegment"), + Ref::new("SetVariableStatementSegment"), ]), None, None, diff --git a/crates/lib-dialects/src/sparksql_keywords.rs b/crates/lib-dialects/src/sparksql_keywords.rs index 849e2f7ba..eb2b42f58 100644 --- a/crates/lib-dialects/src/sparksql_keywords.rs +++ b/crates/lib-dialects/src/sparksql_keywords.rs @@ -270,6 +270,8 @@ pub(crate) const UNRESERVED_KEYWORDS: &[&str] = &[ "UPDATE", "USE", "VALUES", + "VAR", + "VARIABLE", "VIEW", "VIEWS", "WRITE", diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml index d8c2ffe5c..14a053790 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/set_variable.yml @@ -1,89 +1,154 @@ file: - statement: - - set_statement: - - keyword: SET - - property_name_identifier: - - properties_naked_identifier: VAR - - naked_identifier: var1 -- file: - - raw_comparison_operator: = - - numeric_literal: '5' - - semicolon: ; - - word: SET - - word: VARIABLE - - word: var1 - - raw_comparison_operator: = - - start_bracket: ( - - word: SELECT - - word: max - - start_bracket: ( - - word: c1 - - end_bracket: ) - - word: FROM - - word: VALUES - - start_bracket: ( - - numeric_literal: '1' - - end_bracket: ) - - comma: ',' - - start_bracket: ( - - numeric_literal: '2' - - end_bracket: ) - - word: AS - - word: t - - start_bracket: ( - - word: c1 - - end_bracket: ) - - end_bracket: ) - - semicolon: ; - - word: SET - - word: VAR - - word: var1 - - raw_comparison_operator: = - - word: DEFAULT - - semicolon: ; - - word: SET - - word: VAR - - start_bracket: ( - - word: var1 - - comma: ',' - - word: var2 - - comma: ',' - - word: var3 - - end_bracket: ) - - raw_comparison_operator: = - - start_bracket: ( - - word: VALUES - - start_bracket: ( - - numeric_literal: '100' - - comma: ',' - - single_quote: '''x123''' - - comma: ',' - - word: DEFAULT - - end_bracket: ) - - end_bracket: ) - - semicolon: ; - - word: SET - - word: VARIABLE - - back_quote: '`foo`' - - raw_comparison_operator: = - - word: select - - single_quote: '''bar''' - - semicolon: ; - - word: set - - word: var - - word: tz - - raw_comparison_operator: = - - word: current_timezone - - start_bracket: ( - - end_bracket: ) - - semicolon: ; - - word: set - - word: var - - word: x1 - - raw_comparison_operator: = - - numeric_literal: '12' + - keyword: SET + - keyword: VAR + - expression: + - column_reference: + - naked_identifier: var1 + - comparison_operator: + - raw_comparison_operator: = + - numeric_literal: '5' +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: VARIABLE + - expression: + - column_reference: + - naked_identifier: var1 + - comparison_operator: + - raw_comparison_operator: = + - bracketed: + - start_bracket: ( + - select_statement: + - select_clause: + - keyword: SELECT + - select_clause_element: + - function: + - function_name: + - function_name_identifier: max + - bracketed: + - start_bracket: ( + - expression: + - column_reference: + - naked_identifier: c1 + - end_bracket: ) + - from_clause: + - keyword: FROM + - from_expression: + - from_expression_element: + - table_expression: + - values_clause: + - keyword: VALUES + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '1' + - end_bracket: ) + - comma: ',' + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '2' + - end_bracket: ) + - alias_expression: + - keyword: AS + - naked_identifier: t + - bracketed: + - start_bracket: ( + - identifier_list: + - naked_identifier: c1 + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: VAR + - expression: + - column_reference: + - naked_identifier: var1 + - comparison_operator: + - raw_comparison_operator: = + - column_reference: + - naked_identifier: DEFAULT +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: VAR + - expression: + - bracketed: + - start_bracket: ( + - column_reference: + - naked_identifier: var1 + - comma: ',' + - column_reference: + - naked_identifier: var2 + - comma: ',' + - column_reference: + - naked_identifier: var3 + - end_bracket: ) + - comparison_operator: + - raw_comparison_operator: = + - bracketed: + - start_bracket: ( + - values_clause: + - keyword: VALUES + - bracketed: + - start_bracket: ( + - expression: + - numeric_literal: '100' + - comma: ',' + - expression: + - quoted_literal: '''x123''' + - comma: ',' + - expression: + - column_reference: + - naked_identifier: DEFAULT + - end_bracket: ) + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: VARIABLE + - expression: + - column_reference: + - quoted_identifier: '`foo`' + - comparison_operator: + - raw_comparison_operator: = + - select_statement: + - select_clause: + - keyword: select + - select_clause_element: + - quoted_literal: '''bar''' +- statement_terminator: ; +- statement: + - keyword: set + - keyword: var + - expression: + - column_reference: + - naked_identifier: tz + - comparison_operator: + - raw_comparison_operator: = + - function: + - function_name: + - function_name_identifier: current_timezone + - bracketed: + - start_bracket: ( + - end_bracket: ) +- statement_terminator: ; +- statement: + - keyword: set + - keyword: var + - expression: + - column_reference: + - naked_identifier: x1 + - comparison_operator: + - raw_comparison_operator: = + - numeric_literal: '12' - comma: ',' - - word: x2 - - raw_comparison_operator: = - - single_quote: '''helloworld''' - - semicolon: ; + - expression: + - column_reference: + - naked_identifier: x2 + - comparison_operator: + - raw_comparison_operator: = + - quoted_literal: '''helloworld''' +- statement_terminator: ; From bc207d1d6a771c8dacde51b9f4d940e21c9f6731 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Sat, 21 Dec 2024 19:14:55 +0100 Subject: [PATCH 16/19] feat: do the set time zone [ci skip] --- crates/lib-dialects/src/databricks.rs | 19 +++++ .../dialects/databricks/set_time_zone.yml | 78 ++++++++++--------- 2 files changed, 61 insertions(+), 36 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 7ebed3cee..9ffcf3a03 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -906,6 +906,24 @@ pub fn dialect() -> Dialect { .into(), )]); + // A `SET TIME ZONE` statement. + // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-aux-conf-mgmt-set-timezone.html + databricks.add([( + "SetTimeZoneStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("SET"), + Ref::keyword("TIME"), + Ref::keyword("ZONE"), + one_of(vec_of_erased![ + Ref::keyword("LOCAL"), + Ref::new("QuotedLiteralSegment"), + Ref::new("IntervalExpressionSegment") + ]), + ]) + .to_matchable() + .into(), + )]); + // A `SET VARIABLE` statement used to set session variables. // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-aux-set-variable.html // set var v1=val, v2=val2; @@ -959,6 +977,7 @@ pub fn dialect() -> Dialect { Ref::new("CommentOnStatementSegment"), Ref::new("CreateCatalogStatementSegment"), Ref::new("SetVariableStatementSegment"), + Ref::new("SetTimeZoneStatementSegment"), ]), None, None, diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml index 65f850895..c3af4484c 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/set_time_zone.yml @@ -1,38 +1,44 @@ file: - statement: - - set_statement: - - keyword: SET - - property_name_identifier: - - properties_naked_identifier: TIME - - naked_identifier: ZONE -- file: - - word: LOCAL - - semicolon: ; - - word: SET - - word: TIME - - word: ZONE - - single_quote: '''America/Los_Angeles''' - - semicolon: ; - - word: SET - - word: TIME - - word: ZONE - - single_quote: '''+08:00''' - - semicolon: ; - - word: SET - - word: TIME - - word: ZONE - - word: INTERVAL - - numeric_literal: '1' - - word: HOUR - - numeric_literal: '30' - - word: MINUTES - - semicolon: ; - - word: SET - - word: TIME - - word: ZONE - - word: INTERVAL - - single_quote: '''08:30:00''' - - word: HOUR - - word: TO - - word: SECOND - - semicolon: ; + - keyword: SET + - keyword: TIME + - keyword: ZONE + - keyword: LOCAL +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: TIME + - keyword: ZONE + - quoted_literal: '''America/Los_Angeles''' +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: TIME + - keyword: ZONE + - quoted_literal: '''+08:00''' +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: TIME + - keyword: ZONE + - interval_expression: + - keyword: INTERVAL + - interval_literal: + - numeric_literal: '1' + - date_part: HOUR + - interval_literal: + - numeric_literal: '30' + - date_part: MINUTES +- statement_terminator: ; +- statement: + - keyword: SET + - keyword: TIME + - keyword: ZONE + - interval_expression: + - keyword: INTERVAL + - interval_literal: + - signed_quoted_literal: '''08:30:00''' + - date_part: HOUR + - keyword: TO + - date_part: SECOND +- statement_terminator: ; From 1e0133d8c02b77b676e9fa43b88fe745431e300c Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Sat, 21 Dec 2024 19:21:31 +0100 Subject: [PATCH 17/19] feat: partly implemented use database [ci skip] --- crates/lib-dialects/src/databricks.rs | 25 +++++++++++++++ .../dialects/databricks/alter_database.yml | 32 +++++++++---------- .../dialects/databricks/comment_on.yml | 4 +-- .../dialects/databricks/create_database.yml | 14 ++++---- .../dialects/databricks/show_tables.yml | 8 ++--- .../dialects/databricks/show_views.yml | 8 ++--- .../dialects/databricks/show_volumes.yml | 12 +++---- .../dialects/databricks/use_catalog.yml | 2 +- .../dialects/databricks/use_database.yml | 32 +++++++++++-------- 9 files changed, 84 insertions(+), 53 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 9ffcf3a03..1c2854c0b 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -963,6 +963,31 @@ pub fn dialect() -> Dialect { .into(), )]); + databricks.add([ + ( + "DatabaseReferenceSegment".into(), + Ref::new("ObjectReferenceSegment").to_matchable().into(), + ) + ]); + // A `USE DATABASE` statement. + // https://docs.databricks.com/sql/language-manual/sql-ref-syntax-ddl-usedb.html + databricks.replace_grammar( + "UseDatabaseStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("USE"), + one_of(vec_of_erased![ + Ref::keyword("DATABASE"), + Ref::keyword("SCHEMA") + ]) + .config(|config| { + config.optional(); + },), + Ref::new("DatabaseReferenceSegment"), + ]) + .to_matchable() + .into(), + ); + databricks.replace_grammar( "StatementSegment", raw_sparksql diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml index 5b326887f..70da4cddb 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/alter_database.yml @@ -3,7 +3,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: SCHEMA - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: OWNER - keyword: TO @@ -13,7 +13,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: SCHEMA - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: OWNER - keyword: TO @@ -23,7 +23,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: SCHEMA - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: SET - keyword: OWNER @@ -34,7 +34,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: SCHEMA - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: SET - keyword: OWNER @@ -45,7 +45,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: DATABASE - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: OWNER - keyword: TO @@ -55,7 +55,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: DATABASE - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: SET - keyword: OWNER @@ -66,7 +66,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: DATABASE - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: OWNER - keyword: TO @@ -76,7 +76,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: DATABASE - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: SET - keyword: OWNER @@ -87,7 +87,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: SCHEMA - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: SET - keyword: TAGS @@ -104,7 +104,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: DATABASE - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: SET - keyword: TAGS @@ -127,7 +127,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: DATABASE - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: UNSET - keyword: TAGS @@ -141,7 +141,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: SCHEMA - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: UNSET - keyword: TAGS @@ -158,7 +158,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: SCHEMA - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: ENABLE - keyword: PREDICTIVE @@ -168,7 +168,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: DATABASE - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: INHERIT - keyword: PREDICTIVE @@ -178,7 +178,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: SCHEMA - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: DISABLE - keyword: PREDICTIVE @@ -188,7 +188,7 @@ file: - alter_database_statement: - keyword: ALTER - keyword: SCHEMA - - database_reference: + - object_reference: - naked_identifier: some_cat - keyword: SET - keyword: DBPROPERTIES diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml index 8c004055e..9aa2ef3b8 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/comment_on.yml @@ -21,7 +21,7 @@ file: - keyword: COMMENT - keyword: ON - keyword: SCHEMA - - database_reference: + - object_reference: - naked_identifier: my_schema - keyword: IS - quoted_literal: '''This is my schema''' @@ -30,7 +30,7 @@ file: - keyword: COMMENT - keyword: ON - keyword: DATABASE - - database_reference: + - object_reference: - naked_identifier: my_other_schema - keyword: IS - quoted_literal: '''This is my other schema''' diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml index e293ff50d..89458acc9 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/create_database.yml @@ -6,7 +6,7 @@ file: - keyword: IF - keyword: NOT - keyword: EXISTS - - database_reference: + - object_reference: - naked_identifier: database_name - keyword: COMMENT - quoted_literal: '"database_comment"' @@ -30,7 +30,7 @@ file: - keyword: IF - keyword: NOT - keyword: EXISTS - - database_reference: + - object_reference: - naked_identifier: database_name - keyword: COMMENT - quoted_literal: '"database_comment"' @@ -51,7 +51,7 @@ file: - create_database_statement: - keyword: CREATE - keyword: DATABASE - - database_reference: + - object_reference: - naked_identifier: customer_db - statement_terminator: ; - statement: @@ -61,7 +61,7 @@ file: - keyword: IF - keyword: NOT - keyword: EXISTS - - database_reference: + - object_reference: - naked_identifier: customer_db - statement_terminator: ; - statement: @@ -71,7 +71,7 @@ file: - keyword: IF - keyword: NOT - keyword: EXISTS - - database_reference: + - object_reference: - naked_identifier: customer_db - keyword: COMMENT - quoted_literal: '''This is customer database''' @@ -98,7 +98,7 @@ file: - create_database_statement: - keyword: CREATE - keyword: DATABASE - - database_reference: + - object_reference: - naked_identifier: inventory_db - keyword: COMMENT - quoted_literal: '''This database is used to maintain Inventory''' @@ -110,7 +110,7 @@ file: - keyword: IF - keyword: NOT - keyword: EXISTS - - database_reference: + - object_reference: - naked_identifier: database_name - file: - word: MANAGED diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml index 2bef920b6..c6c4b70c1 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_tables.yml @@ -9,7 +9,7 @@ file: - keyword: SHOW - keyword: TABLES - keyword: FROM - - database_reference: + - object_reference: - naked_identifier: userdb - statement_terminator: ; - statement: @@ -17,7 +17,7 @@ file: - keyword: SHOW - keyword: TABLES - keyword: IN - - database_reference: + - object_reference: - naked_identifier: userdb - statement_terminator: ; - statement: @@ -25,7 +25,7 @@ file: - keyword: SHOW - keyword: TABLES - keyword: FROM - - database_reference: + - object_reference: - naked_identifier: default - keyword: LIKE - quoted_literal: '''sam*''' @@ -35,7 +35,7 @@ file: - keyword: SHOW - keyword: TABLES - keyword: FROM - - database_reference: + - object_reference: - naked_identifier: default - file: - single_quote: '''sam*''' diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml index 2a569b67a..22e563a44 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_views.yml @@ -11,7 +11,7 @@ file: - keyword: SHOW - keyword: VIEWS - keyword: FROM - - database_reference: + - object_reference: - naked_identifier: userdb - statement_terminator: ; - statement: @@ -20,7 +20,7 @@ file: - keyword: SHOW - keyword: VIEWS - keyword: IN - - database_reference: + - object_reference: - naked_identifier: global_temp - statement_terminator: ; - statement: @@ -29,7 +29,7 @@ file: - keyword: SHOW - keyword: VIEWS - keyword: FROM - - database_reference: + - object_reference: - naked_identifier: default - keyword: LIKE - quoted_literal: '''sam*''' @@ -48,7 +48,7 @@ file: - keyword: SHOW - keyword: VIEWS - keyword: FROM - - database_reference: + - object_reference: - naked_identifier: default - quoted_literal: '''sam*''' - statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml index a1b696b73..5e2f6d32f 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/show_volumes.yml @@ -9,7 +9,7 @@ file: - keyword: SHOW - keyword: VOLUMES - keyword: IN - - database_reference: + - object_reference: - naked_identifier: sampledb - statement_terminator: ; - statement: @@ -17,7 +17,7 @@ file: - keyword: SHOW - keyword: VOLUMES - keyword: FROM - - database_reference: + - object_reference: - naked_identifier: sampledb - statement_terminator: ; - statement: @@ -38,7 +38,7 @@ file: - keyword: SHOW - keyword: VOLUMES - keyword: IN - - database_reference: + - object_reference: - naked_identifier: sampledb - keyword: LIKE - quoted_literal: '''regex*''' @@ -48,7 +48,7 @@ file: - keyword: SHOW - keyword: VOLUMES - keyword: IN - - database_reference: + - object_reference: - naked_identifier: sampledb - quoted_literal: '''regex*''' - statement_terminator: ; @@ -57,7 +57,7 @@ file: - keyword: SHOW - keyword: VOLUMES - keyword: FROM - - database_reference: + - object_reference: - naked_identifier: sampledb - keyword: LIKE - quoted_literal: '''regex*''' @@ -67,7 +67,7 @@ file: - keyword: SHOW - keyword: VOLUMES - keyword: FROM - - database_reference: + - object_reference: - naked_identifier: sampledb - quoted_literal: '''regex*''' - statement_terminator: ; diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml index 3b1cc413b..b8580b68c 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/use_catalog.yml @@ -2,7 +2,7 @@ file: - statement: - use_statement: - keyword: USE - - database_reference: + - object_reference: - naked_identifier: CATALOG - file: - word: catalog_name diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml index 19a636ed7..508979b44 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/use_database.yml @@ -2,35 +2,41 @@ file: - statement: - use_statement: - keyword: USE - - database_reference: + - object_reference: - naked_identifier: database_name - statement_terminator: ; - statement: - use_statement: - keyword: USE - - database_reference: + - object_reference: - naked_identifier: userdb - statement_terminator: ; - statement: - use_statement: - keyword: USE - - database_reference: + - object_reference: - naked_identifier: userdb1 - statement_terminator: ; +- statement: + - use_database_statement: + - keyword: USE + - keyword: DATABASE + - object_reference: + - naked_identifier: database_name +- statement_terminator: ; +- statement: + - use_database_statement: + - keyword: USE + - keyword: SCHEMA + - object_reference: + - naked_identifier: database_name +- statement_terminator: ; - statement: - use_statement: - keyword: USE - - database_reference: - - naked_identifier: DATABASE + - object_reference: + - naked_identifier: IDENTIFIER - file: - - word: database_name - - semicolon: ; - - word: USE - - word: SCHEMA - - word: database_name - - semicolon: ; - - word: USE - - word: IDENTIFIER - start_bracket: ( - single_quote: '''database_name''' - end_bracket: ) From 661bbdc0d45206f4b43994d95dc4075dc71702d2 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Sat, 21 Dec 2024 19:29:48 +0100 Subject: [PATCH 18/19] temp [ci skip] --- crates/lib-dialects/src/databricks.rs | 62 ++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 1c2854c0b..5c906467f 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -988,6 +988,64 @@ pub fn dialect() -> Dialect { .into(), ); + // The parameters for a function ie. `(column type COMMENT 'comment')`. + databricks.add([( + "FunctionParameterListGrammarWithComments".into(), + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("FunctionParameterGrammar"), + AnyNumberOf::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::keyword("DEFAULT"), + Ref::new("LiteralGrammar"), + ]) + .config(|config| config.optional()), + Ref::new("CommentClauseSegment").optional(), + ]), + ]), + ])]) + .to_matchable() + .into(), + )]); + + // A `CREATE FUNCTION` statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-create-sql-function.html + databricks.add([( + "CreateDatabricksFunctionStatementSegment".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("CREATE"), + Ref::new("OrReplaceGrammar").optional(), + Ref::new("TemporaryGrammar").optional(), + Ref::keyword("FUNCTION"), + Ref::new("IfNotExistsGrammar").optional(), + Ref::new("FunctionNameSegment"), + Ref::new("FunctionParameterListGrammarWithComments"), + Sequence::new(vec_of_erased![ + Ref::keyword("RETURNS"), + one_of(vec_of_erased![ + Ref::new("DatatypeSegment"), + Sequence::new(vec_of_erased![ + Ref::keyword("TABLE"), + Sequence::new(vec_of_erased![ + Bracketed::new(vec_of_erased![Delimited::new(vec_of_erased![ + Sequence::new(vec_of_erased![ + Ref::new("ColumnReferenceSegment"), + Ref::new("DatatypeSegment"), + Ref::new("CommentGrammar").optional(), + ]), + ]),]), + ]) + .config(|config| { config.optional() }), + ]), + ]) + .config(|config| { config.optional() }), + ]) + .config(|config| { config.optional() }), + Ref::new("FunctionDefinitionGrammar"), + + ]).to_matchable().into(), + )]); + databricks.replace_grammar( "StatementSegment", raw_sparksql @@ -1003,7 +1061,9 @@ pub fn dialect() -> Dialect { Ref::new("CreateCatalogStatementSegment"), Ref::new("SetVariableStatementSegment"), Ref::new("SetTimeZoneStatementSegment"), - ]), + Ref::new("CreateDatabricksFunctionStatementSegment"), + Ref::new("FunctionParameterListGrammarWithComments"), + ]), None, None, None, From 24d3a3a8eb371578c7351c1d8ca51dfad8820392 Mon Sep 17 00:00:00 2001 From: Ben King <9087625+benfdking@users.noreply.github.com> Date: Sat, 21 Dec 2024 20:01:28 +0100 Subject: [PATCH 19/19] drop volume logic [ci skip] --- crates/lib-dialects/src/databricks.rs | 15 +++++++++++ .../dialects/databricks/drop_volume.yml | 25 +++++++++++-------- 2 files changed, 29 insertions(+), 11 deletions(-) diff --git a/crates/lib-dialects/src/databricks.rs b/crates/lib-dialects/src/databricks.rs index 5c906467f..d8aa2a39a 100644 --- a/crates/lib-dialects/src/databricks.rs +++ b/crates/lib-dialects/src/databricks.rs @@ -1046,6 +1046,20 @@ pub fn dialect() -> Dialect { ]).to_matchable().into(), )]); + // Drop Volume Statement. + // https://docs.databricks.com/en/sql/language-manual/sql-ref-syntax-ddl-drop-volume.html + databricks.add([( + "DropVolumeStatement".into(), + Sequence::new(vec_of_erased![ + Ref::keyword("DROP"), + Ref::keyword("VOLUME"), + Ref::new("IfExistsGrammar").optional(), + Ref::new("VolumeReferenceSegment"), + ]) + .to_matchable() + .into(), + )]); + databricks.replace_grammar( "StatementSegment", raw_sparksql @@ -1063,6 +1077,7 @@ pub fn dialect() -> Dialect { Ref::new("SetTimeZoneStatementSegment"), Ref::new("CreateDatabricksFunctionStatementSegment"), Ref::new("FunctionParameterListGrammarWithComments"), + Ref::new("DropVolumeStatement"), ]), None, None, diff --git a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml index 79aeb3251..b21d0c759 100644 --- a/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml +++ b/crates/lib-dialects/test/fixtures/dialects/databricks/drop_volume.yml @@ -1,12 +1,15 @@ file: -- unparsable: - - word: DROP - - word: VOLUME - - word: vaccine_volume - - semicolon: ; - - word: DROP - - word: VOLUME - - word: IF - - word: EXISTS - - word: vaccine_volume - - semicolon: ; +- statement: + - keyword: DROP + - keyword: VOLUME + - object_reference: + - naked_identifier: vaccine_volume +- statement_terminator: ; +- statement: + - keyword: DROP + - keyword: VOLUME + - keyword: IF + - keyword: EXISTS + - object_reference: + - naked_identifier: vaccine_volume +- statement_terminator: ;