diff --git a/.github/workflows/pr_testing.yml b/.github/workflows/pr_testing.yml index 841e6dcdc..29f00cae8 100644 --- a/.github/workflows/pr_testing.yml +++ b/.github/workflows/pr_testing.yml @@ -16,6 +16,11 @@ on: description: "Python 3.12" type: boolean default: true + run-all: + description: "Run All Tests" + type: boolean + required: false + default: false run-python: description: "Run Main PyDough Tests" type: boolean @@ -101,8 +106,9 @@ jobs: # https://docs.github.com/en/actions/learn-github-actions/expressions#contains # contains is case-insensitive if: | + (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run all]')) || (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run ci]')) || - (github.event_name == 'workflow_dispatch' && inputs.run-python) + (github.event_name == 'workflow_dispatch' && (inputs.run-all || inputs.run-python)) runs-on: ubuntu-latest strategy: matrix: @@ -138,8 +144,9 @@ jobs: name: Snowflake Tests needs: [get-msg, get-py-ver-matrix] if: | + (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run all]')) || (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run sf]')) || - (github.event_name == 'workflow_dispatch' && inputs.run-sf) + (github.event_name == 'workflow_dispatch' && (inputs.run-all || inputs.run-sf)) uses: ./.github/workflows/sf_testing.yml secrets: SF_USERNAME: ${{ secrets.SF_USERNAME }} @@ -154,8 +161,9 @@ jobs: name: MySQL Tests needs: [get-msg, get-py-ver-matrix] if: | + (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run all]')) || (github.event_name == 'pull_request' && contains(needs.get-msg.outputs.commitMsg, '[run mysql]')) || - (github.event_name == 'workflow_dispatch' && inputs.run-mysql) + (github.event_name == 'workflow_dispatch' && (inputs.run-all || inputs.run-mysql)) uses: ./.github/workflows/mysql_testing.yml # Path to MySQL workflow file secrets: MYSQL_USERNAME: ${{ secrets.MYSQL_USERNAME }} diff --git a/README.md b/README.md index 77dde1958..ac167691e 100644 --- a/README.md +++ b/README.md @@ -97,8 +97,49 @@ script must be run so that the `defog.db` file is located in the `tests` directo ## Running CI Tests -To run our CI tests on your PR, you must include the flag `[run CI]` in latest -commit message. +When submitting a PR, you can control which CI tests run by adding special flags +to your **latest commit message**. + +**Note:** All flags are **case-insensitive**. + +- To run **PyDough CI tests**, add: `[run CI]` (only runs **SQLite tests**, no other SQL dialects) +- To run **PyDough and all dialect tests**, add: `[run all]` +- To run **specific dialect tests**, use the corresponding flag as described below. + +### Running Snowflake Tests on CI +To run **Snowflake CI tests**, add the flag `[run SF]` to your commit message. + +**Running Snowflake tests locally:** + +1. Install the Snowflake Connector for Python with Pandas support + ```bash + pip install "snowflake-connector-python[pandas]" + ``` + +2. Set your Snowflake credentials as environment variables: + ```bash + export SF_USERNAME="your_username" + export SF_PASSWORD="your_password" + export SF_ACCOUNT="your_account" + ``` + +### Running MySQL Tests on CI +To run **MySQL CI tests**, add the flag `[run mysql]` to your commit message. + +**Running MySQL tests locally:** + +1. Make sure you have [**Docker Desktop**](https://www.docker.com/get-started/) + installed and running. + +2. Install the MySQL Connector for Python + ```bash + pip install mysql-connector-python + ``` + +3. Set your MySQL credentials as environment variables: + ```bash + export MYSQL_USERNAME="your_username" + export MYSQL_PASSWORD="your_password" ## Runtime Dependencies diff --git a/demos/README.md b/demos/README.md index 9e1d9095d..7a69e7f3b 100644 --- a/demos/README.md +++ b/demos/README.md @@ -24,4 +24,5 @@ Once the introduction notebook is complete, you can explore the other notebooks: - [4_tpch.ipynb](notebooks/4_tpch.ipynb) provides PyDough translations for most of the TPC-H benchmark queries. - [5_what_if.ipynb](notebooks/5_what_if.ipynb) demonstrates how to do WHAT-IF analysis with PyDough. - [MySQL_TPCH.ipynb](notebooks/MySQL_TPCH.ipynb) demonstrates how to connect a MySQL database with PyDough. +- [SF_TPCH_q1.ipynb](notebooks/SF_TPCH_q1.ipynb) demonstrates how to connect a Snowflake database with PyDough. diff --git a/demos/notebooks/1_introduction.ipynb b/demos/notebooks/1_introduction.ipynb index 931b4c604..35ee0ad78 100644 --- a/demos/notebooks/1_introduction.ipynb +++ b/demos/notebooks/1_introduction.ipynb @@ -294,7 +294,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.5" + "version": "3.12.2" } }, "nbformat": 4, diff --git a/demos/notebooks/MySQL_TPCH.ipynb b/demos/notebooks/MySQL_TPCH.ipynb index b4c416c60..6858813cf 100644 --- a/demos/notebooks/MySQL_TPCH.ipynb +++ b/demos/notebooks/MySQL_TPCH.ipynb @@ -28,7 +28,7 @@ "> You can also test with our **pre-built MySQL TPC-H database** available on **Docker Hub**.\n", ">\n", "> #### πŸ“‹ Requirements\n", - "> - Make sure you have **Docker** installed.\n", + "> - Make sure you have **Docker Desktop** installed and running.\n", ">\n", "> #### πŸ“¦ Pull and Run the Container\n", "> ```bash\n", diff --git a/demos/notebooks/SF_TPCH_q1.ipynb b/demos/notebooks/SF_TPCH_q1.ipynb new file mode 100644 index 000000000..478f5650a --- /dev/null +++ b/demos/notebooks/SF_TPCH_q1.ipynb @@ -0,0 +1,569 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> πŸ“¦ **Note:** Before running this notebook, make sure the **Snowflake Python connector with pandas support** is installed.\n", + "\n", + "You can install it using one of the following:\n", + "\n", + "- If you're working inside the repo:\n", + " ```bash\n", + " pip install -e \".[snowflake]\"\n", + " ```\n", + "\n", + "- Or install the connector directly\n", + " ```bash\n", + " pip install \"snowflake-connector-python[pandas]\"\n", + " ```" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### πŸ“¦ Importing Required Libraries" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pydough\n", + "import datetime\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### πŸ” Loading Credentials and Connecting to Snowflake\n", + "\n", + "This block performs two main tasks:\n", + "\n", + "1. **Load credentials from a local JSON file**:\n", + " - The `creds.json` file contains your Snowflake login details like username, password, account name, database, schema, and warehouse.\n", + " - These are read using Python’s built-in `json` module and stored in variables.\n", + "\n", + "2. **Connect to Snowflake using PyDough**:\n", + " - `pydough.active_session.load_metadata_graph(...)` loads a metadata graph that maps your Snowflake schema (used for query planning or optimizations).\n", + " - `connect_database(...)` uses the loaded credentials to establish a live connection to your Snowflake database.\n", + "\n", + "πŸ“Œ Make sure:\n", + "- The `creds.json` file exists and contains all the required keys.\n", + "- The metadata graph path points to a valid JSON file that represents your schema.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "DatabaseContext(connection=, dialect=)" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import json\n", + "\n", + "# Step 1: Load credentials from a JSON file\n", + "path_to_creds = \"./creds.json\"\n", + "with open(path_to_creds) as f:\n", + " creds = json.load(f)\n", + "\n", + "sf_username = creds[\"SF_USERNAME\"]\n", + "sf_password = creds[\"SF_PASSWORD\"]\n", + "sf_account = creds[\"SF_ACCOUNT\"]\n", + "sf_tpch_db = creds[\"SF_DATABASE\"]\n", + "sf_tpch_schema = creds[\"SF_SCHEMA\"]\n", + "sf_warehouse = creds[\"SF_WH\"]\n", + "\n", + "# Step 2: Load a sample metadata graph and connect PyDough to Snowflake\n", + "pydough.active_session.load_metadata_graph(\"../../tests/test_metadata/snowflake_sample_graphs.json\", \"TPCH\")\n", + "pydough.active_session.connect_database(\"snowflake\", \n", + " user=sf_username,\n", + " password=sf_password,\n", + " account=sf_account,\n", + " warehouse=sf_warehouse,\n", + " database=sf_tpch_db,\n", + " schema=sf_tpch_schema,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### πŸ”Œ Enabling PyDough's Jupyter Magic Commands\n", + "\n", + "This line loads the `pydough.jupyter_extensions` module, which adds **custom magic commands** (like `%%pydough`) to the notebook.\n", + "\n", + "These magic commands allow you to:\n", + "- Write PyDough directly in notebook cells using `%%pydough`\n", + "- Automatically render results\n", + "\n", + "This is a Jupyter-specific feature β€” the `%load_ext` command dynamically loads these extensions into your current notebook session.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext pydough.jupyter_extensions" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### πŸ“Š TPC-H Query 1 Using PyDough DSL\n", + "\n", + "This cell runs **TPC-H Query 1** using PyDough's Python-style DSL instead of raw SQL.\n", + "\n", + "The query computes summary statistics (like sums, averages, and counts) for orders, grouped by return flag and line status, and filtered by a shipping date cutoff.\n", + "\n", + "Finally, `pydough.to_df(output)` converts and prints the result as a Pandas DataFrame for easy inspection and analysis in Python.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
L_RETURNFLAGL_LINESTATUSSUM_QTYSUM_BASE_PRICESUM_DISC_PRICESUM_CHARGEAVG_QTYAVG_PRICEAVG_DISCCOUNT_ORDER
0AF37734107.056586554400.7353758257134.870055909065222.82769225.5220058538273.129734620.049985301478493
1NF991417.01487504710.381413082168.05411469649223.19437525.5164719238284.467760850.0500934338854
2NO76633518.0114935210409.19109189591897.4720113561024263.01378225.5020196438248.015609060.050000263004998
3RF37719753.056568041380.9053741292684.604055889619119.83193225.5057936138250.854626100.050009411478870
\n", + "
" + ], + "text/plain": [ + " L_RETURNFLAG L_LINESTATUS SUM_QTY SUM_BASE_PRICE SUM_DISC_PRICE \\\n", + "0 A F 37734107.0 56586554400.73 53758257134.8700 \n", + "1 N F 991417.0 1487504710.38 1413082168.0541 \n", + "2 N O 76633518.0 114935210409.19 109189591897.4720 \n", + "3 R F 37719753.0 56568041380.90 53741292684.6040 \n", + "\n", + " SUM_CHARGE AVG_QTY AVG_PRICE AVG_DISC COUNT_ORDER \n", + "0 55909065222.827692 25.52200585 38273.12973462 0.04998530 1478493 \n", + "1 1469649223.194375 25.51647192 38284.46776085 0.05009343 38854 \n", + "2 113561024263.013782 25.50201964 38248.01560906 0.05000026 3004998 \n", + "3 55889619119.831932 25.50579361 38250.85462610 0.05000941 1478870 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%%pydough\n", + "# TPCH Q1\n", + "output = (\n", + " lines.WHERE((ship_date <= datetime.date(1998, 12, 1)))\n", + " .PARTITION(name=\"groups\", by=(return_flag, status))\n", + " .CALCULATE(\n", + " L_RETURNFLAG=return_flag,\n", + " L_LINESTATUS=status,\n", + " SUM_QTY=SUM(lines.quantity),\n", + " SUM_BASE_PRICE=SUM(lines.extended_price),\n", + " SUM_DISC_PRICE=SUM(lines.extended_price * (1 - lines.discount)),\n", + " SUM_CHARGE=SUM(\n", + " lines.extended_price * (1 - lines.discount) * (1 + lines.tax)\n", + " ),\n", + " AVG_QTY=AVG(lines.quantity),\n", + " AVG_PRICE=AVG(lines.extended_price),\n", + " AVG_DISC=AVG(lines.discount),\n", + " COUNT_ORDER=COUNT(lines),\n", + " )\n", + " .ORDER_BY(L_RETURNFLAG.ASC(), L_LINESTATUS.ASC())\n", + ")\n", + "# Step 3: Execute code\n", + "pydough.to_df(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another way to connect to the Snowflake database is by passing a connection object directly. Here's an example:" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
L_RETURNFLAGL_LINESTATUSSUM_QTYSUM_BASE_PRICESUM_DISC_PRICESUM_CHARGEAVG_QTYAVG_PRICEAVG_DISCCOUNT_ORDER
0AF37734107.056586554400.7353758257134.870055909065222.82769225.5220058538273.129734620.049985301478493
1NF991417.01487504710.381413082168.05411469649223.19437525.5164719238284.467760850.0500934338854
2NO76633518.0114935210409.19109189591897.4720113561024263.01378225.5020196438248.015609060.050000263004998
3RF37719753.056568041380.9053741292684.604055889619119.83193225.5057936138250.854626100.050009411478870
\n", + "
" + ], + "text/plain": [ + " L_RETURNFLAG L_LINESTATUS SUM_QTY SUM_BASE_PRICE SUM_DISC_PRICE \\\n", + "0 A F 37734107.0 56586554400.73 53758257134.8700 \n", + "1 N F 991417.0 1487504710.38 1413082168.0541 \n", + "2 N O 76633518.0 114935210409.19 109189591897.4720 \n", + "3 R F 37719753.0 56568041380.90 53741292684.6040 \n", + "\n", + " SUM_CHARGE AVG_QTY AVG_PRICE AVG_DISC COUNT_ORDER \n", + "0 55909065222.827692 25.52200585 38273.12973462 0.04998530 1478493 \n", + "1 1469649223.194375 25.51647192 38284.46776085 0.05009343 38854 \n", + "2 113561024263.013782 25.50201964 38248.01560906 0.05000026 3004998 \n", + "3 55889619119.831932 25.50579361 38250.85462610 0.05000941 1478870 " + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import snowflake.connector as sf_connector\n", + "\n", + "# Step 1: Load credentials from a JSON file\n", + "path_to_creds = \"./creds.json\"\n", + "with open(path_to_creds) as f:\n", + " creds = json.load(f)\n", + "\n", + "sf_username = creds[\"SF_USERNAME\"]\n", + "sf_password = creds[\"SF_PASSWORD\"]\n", + "sf_account = creds[\"SF_ACCOUNT\"]\n", + "sf_tpch_db = creds[\"SF_DATABASE\"]\n", + "sf_tpch_schema = creds[\"SF_SCHEMA\"]\n", + "sf_warehouse = creds[\"SF_WH\"]\n", + "\n", + "# Step 2: Create a Snowflake connection object\n", + "connection: sf_connector.connection.SnowflakeConnection = sf_connector.connect(\n", + " user=sf_username,\n", + " password=sf_password,\n", + " account=sf_account,\n", + " warehouse=sf_warehouse,\n", + " database=sf_tpch_db,\n", + " schema=sf_tpch_schema\n", + " )\n", + "\n", + "# Step 3: Load a sample metadata graph and connect PyDough to Snowflake\n", + "pydough.active_session.load_metadata_graph(\"../../tests/test_metadata/snowflake_sample_graphs.json\", \"TPCH\")\n", + "conn_db = pydough.active_session.connect_database(\"snowflake\", connection=connection)\n", + "pydough.active_session.database = conn_db\n", + "\n", + "# Step 4: Execute code\n", + "pydough.to_df(output)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, use the same connection outside PyDoughto inspect the last query executed" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Last query text: SELECT\n", + " l_returnflag AS L_RETURNFLAG,\n", + " l_linestatus AS L_LINESTATUS,\n", + " COALESCE(SUM(l_quantity), 0) AS SUM_QTY,\n", + " COALESCE(SUM(l_extendedprice), 0) AS SUM_BASE_PRICE,\n", + " COALESCE(SUM(l_extendedprice * (\n", + " 1 - l_discount\n", + " )), 0) AS SUM_DISC_PRICE,\n", + " COALESCE(SUM(l_extendedprice * (\n", + " 1 - l_discount\n", + " ) * (\n", + " 1 + l_tax\n", + " )), 0) AS SUM_CHARGE,\n", + " AVG(l_quantity) AS AVG_QTY,\n", + " AVG(l_extendedprice) AS AVG_PRICE,\n", + " AVG(l_discount) AS AVG_DISC,\n", + " COUNT(*) AS COUNT_ORDER\n", + "FROM tpch_sf1.lineitem\n", + "WHERE\n", + " l_shipdate <= CAST('1998-12-01' AS DATE)\n", + "GROUP BY\n", + " 1,\n", + " 2\n", + "ORDER BY\n", + " 1 NULLS FIRST,\n", + " 2 NULLS FIRST\n" + ] + } + ], + "source": [ + "cur = conn_db.connection.cursor\n", + "\n", + "\n", + "last_query_id = cur.sfqid\n", + "cur.execute(f\"\"\"\n", + " SELECT query_text\n", + " FROM TABLE(INFORMATION_SCHEMA.QUERY_HISTORY())\n", + " WHERE query_id = '{last_query_id}'\n", + "\"\"\")\n", + "last_query_text = cur.fetchone()[0]\n", + "print(\"Last query text:\", last_query_text)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Run additional queries if needed:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Snowflake version: 9.24.1\n", + "Today's date: 2025-08-26\n" + ] + } + ], + "source": [ + "# Example: run another test query\n", + "cur.execute(\"SELECT CURRENT_VERSION()\")\n", + "version = cur.fetchone()[0]\n", + "print(\"Snowflake version:\", version)\n", + "\n", + "# Example: Get today's date\n", + "cur.execute(\"SELECT CURRENT_DATE()\")\n", + "today = cur.fetchone()[0]\n", + "print(\"Today's date:\", today)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Finally, close the cursor and connection:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "cur.close()\n", + "connection.close()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.2" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/documentation/usage.md b/documentation/usage.md index 51fa4cab7..00d231438 100644 --- a/documentation/usage.md +++ b/documentation/usage.md @@ -242,6 +242,7 @@ The following configs are used in the behavior of `DAYOFWEEK`, `DATETIME`, and ` - `DAYOFWEEK` : A function that returns the number of days since the start of the week. Start of week is relative to the `start_of_week` config. - `DATETIME` : This function also supports the `start of week` unit, which is relative to the `start_of_week` config. - `DATEDIFF` : This function also supports difference between two dates in terms of weeks, which is relative to the `start_of_week` config. +- `DATE_TRUNC` : This function also supports truncating a date to the start of the week, which is relative to the `start_of_week` config. The value must be one of the following `DayOfWeek` enum values: @@ -254,6 +255,10 @@ The value must be one of the following `DayOfWeek` enum values: - `DayOfWeek.SATURDAY` The `DayOfWeek` enum is defined in the `pydough.configs` module. + +**Note:** In Snowflake, PyDough does not automatically detect changes to `WEEK_START` session parameter. Please configure the `start_of_week` in your PyDough configurations. + + 6. `start_week_as_zero` (default=True): if True, then the first day of the week is considered to be 0. If False, then the first day of the week is considered to be 1. This config is used by `DAYOFWEEK` function. ```py @@ -335,8 +340,27 @@ Just like the knowledge graph & miscellaneous configurations, the database conte Below is a list of all supported values for the database name: - `sqlite`: uses a SQLite database. [See here](https://docs.python.org/3/library/sqlite3.html#sqlite3.connect) for details on the connection API and what keyword arguments can be passed in. + - `mysql`: uses a MySQL database. [See here](https://dev.mysql.com/doc/connector-python/en/connector-python-example-connecting.html) for details on the connection API and what keyword arguments can be passed in. +- `snowflake`: uses a Snowflake database. [See here](https://docs.snowflake.com/en/user-guide/python-connector.html#connecting-to-snowflake) for details on the connection API and what keyword arguments can be passed in. + +> Note: If you installed PyDough via pip, you can install optional connectors using pip extras: +> +> ```bash +> pip install pydough[mysql] # Install MySQL connector +> pip install pydough[snowflake] # Install Snowflake connector +> pip install "pydough[mysql,snowflake]" # Install both at once +> ``` + +Here’s a quick reference table showing which connector is needed for each dialect: + +| Dialect | Connector Needed | +|-----------|----------------------------------------| +| `sqlite` | Already included with PyDough | +| `mysql` | `mysql-connector-python` | +| `snowflake` | `snowflake-connector-python[pandas]` | + Below are examples of how to access the context and switch it out for a newly created one, either by manually setting it or by using `session.load_database`. These examples assume that there are two different sqlite database files located at `db_files/education.db` and `db_files/shakespeare.db`. ```py @@ -361,6 +385,20 @@ It is important to ensure that the correct database context is being used for se - The context's database connection is used to execute queries once translated to SQL. #### Examples with different supported database connectors with PyDough +- Snowflake: You can connect to a Snowflake database using `load_metadata_graph` and `connect_database` APIs. For example: + ```py + pydough.active_session.load_metadata_graph("../../tests/test_metadata/snowflake_sample_graphs.json", "TPCH"), + pydough.active_session.connect_database("snowflake", + user=snowflake_username, + password=snowflake_password, + account=snowflake_account, + warehouse=snowflake_warehouse, + database=snowflake_database, + schema=snowflake_schema + ) + ``` +You can find a full example of using Snowflake database with PyDough in [this usage guide](./../demos/notebooks/SF_TPCH_q1.ipynb). + - MySQL: You can connect to a mysql database using `load_metadata_graph` and `connect_database` APIs. For example: ```py pydough.active_session.load_metadata_graph("../../tests/test_metadata/sample_graphs.json", "TPCH"), diff --git a/pydough/conversion/hybrid_translator.py b/pydough/conversion/hybrid_translator.py index f7a1895b8..e4d4c1a75 100644 --- a/pydough/conversion/hybrid_translator.py +++ b/pydough/conversion/hybrid_translator.py @@ -90,7 +90,10 @@ def __init__(self, configs: PyDoughConfigs, dialect: DatabaseDialect): # If True, rewrites MEDIAN calls into an average of the 1-2 median rows # or rewrites QUANTILE calls to select the first qualifying row, # both derived from window functions, otherwise leaves as-is. - self.rewrite_median_quantile: bool = dialect not in {DatabaseDialect.ANSI} + self.rewrite_median_quantile: bool = dialect not in { + DatabaseDialect.ANSI, + DatabaseDialect.SNOWFLAKE, + } @staticmethod def get_subcollection_join_keys( diff --git a/pydough/database_connectors/README.md b/pydough/database_connectors/README.md index 5660b3a5a..76c760e44 100644 --- a/pydough/database_connectors/README.md +++ b/pydough/database_connectors/README.md @@ -16,6 +16,7 @@ The database connectors module provides functionality to manage database connect - Supported dialects: - `ANSI`: Represents the ANSI SQL dialect. - `SQLITE`: Represents the SQLite SQL dialect. + - `SNOWFLAKE`: Represents the Snowflake SQL dialect. - `MYSQL`: Represents the MySQL dialect. - `DatabaseContext`: Dataclass that manages the database connection and the corresponding dialect. - Fields: @@ -30,6 +31,7 @@ The database connectors module provides functionality to manage database connect - `load_database_context`: Loads the database context with the appropriate connection and dialect. - `load_sqlite_connection`: Loads a SQLite database connection. +- `load_snowflake_connection`: Loads a Snowflake connection. - `load_mysql_connection`: Loads a MySQL database connection. ## Usage diff --git a/pydough/database_connectors/__init__.py b/pydough/database_connectors/__init__.py index 198f820fa..f8fcea836 100644 --- a/pydough/database_connectors/__init__.py +++ b/pydough/database_connectors/__init__.py @@ -5,12 +5,14 @@ "empty_connection", "load_database_context", "load_mysql_connection", + "load_snowflake_connection", "load_sqlite_connection", ] from .builtin_databases import ( load_database_context, load_mysql_connection, + load_snowflake_connection, load_sqlite_connection, ) from .database_connector import DatabaseConnection, DatabaseContext, DatabaseDialect diff --git a/pydough/database_connectors/builtin_databases.py b/pydough/database_connectors/builtin_databases.py index 1cc7586b6..9417c4a95 100644 --- a/pydough/database_connectors/builtin_databases.py +++ b/pydough/database_connectors/builtin_databases.py @@ -11,6 +11,7 @@ __all__ = [ "load_database_context", "load_mysql_connection", + "load_snowflake_connection", "load_sqlite_connection", ] @@ -28,13 +29,16 @@ def load_database_context(database_name: str, **kwargs) -> DatabaseContext: Returns: The database context object. """ - supported_databases = {"sqlite", "mysql"} + supported_databases = {"sqlite", "snowflake", "mysql"} connection: DatabaseConnection dialect: DatabaseDialect match database_name.lower(): case "sqlite": connection = load_sqlite_connection(**kwargs) dialect = DatabaseDialect.SQLITE + case "snowflake": + connection = load_snowflake_connection(**kwargs) + dialect = DatabaseDialect.SNOWFLAKE case "mysql": connection = load_mysql_connection(**kwargs) dialect = DatabaseDialect.MYSQL @@ -60,6 +64,53 @@ def load_sqlite_connection(**kwargs) -> DatabaseConnection: return DatabaseConnection(connection) +def load_snowflake_connection(**kwargs) -> DatabaseConnection: + """ + Loads a Snowflake database connection. + If a connection object is provided in the keyword arguments, + it will be used directly. Otherwise, the connection will be created + using the provided keyword arguments. + Args: + **kwargs: + The Snowflake connection or its connection parameters. + This includes the required parameters for connecting to Snowflake, + such as `user`, `password`, and `account`. Optional parameters + like `database`, `schema`, and `warehouse` can also be provided. + Raises: + ImportError: If the Snowflake connector is not installed. + ValueError: If required connection parameters are missing. + + Returns: + DatabaseConnection: A database connection object for Snowflake. + """ + try: + import snowflake.connector + except ImportError: + raise ImportError( + "Snowflake connector is not installed. Please install it with `pip install snowflake-connector-python`." + ) + + connection: snowflake.connector.connection.SnowflakeConnection + if connection := kwargs.pop("connection", None): + # If a connection object is provided, return it wrapped in DatabaseConnection + return DatabaseConnection(connection) + # Snowflake connection requires specific parameters: + # user, password, account. + # Raise an error if any of these are missing. + # NOTE: database, schema, and warehouse are optional and + # will default to the user's settings. + # See: https://docs.snowflake.com/en/developer-guide/python-connector/python-connector-api#label-snowflake-connector-methods-connect + required_keys = ["user", "password", "account"] + if not all(key in kwargs for key in required_keys): + raise ValueError( + "Snowflake connection requires the following arguments: " + + ", ".join(required_keys) + ) + # Create a Snowflake connection using the provided keyword arguments + connection = snowflake.connector.connect(**kwargs) + return DatabaseConnection(connection) + + def load_mysql_connection(**kwargs) -> DatabaseConnection: """ Loads a MySQL database connection. This is done by providing a wrapper diff --git a/pydough/database_connectors/database_connector.py b/pydough/database_connectors/database_connector.py index c19c114dd..dce6ebb55 100644 --- a/pydough/database_connectors/database_connector.py +++ b/pydough/database_connectors/database_connector.py @@ -7,10 +7,11 @@ from dataclasses import dataclass from enum import Enum +from typing import TYPE_CHECKING, cast import pandas as pd -from .db_types import DBConnection, DBCursor +from .db_types import DBConnection, DBCursor, SnowflakeCursor __all__ = ["DatabaseConnection", "DatabaseContext", "DatabaseDialect"] @@ -26,9 +27,11 @@ class DatabaseConnection: # sqlite3 contains the connection specification and is packaged # with Python. _connection: DBConnection + _cursor: DBCursor | None def __init__(self, connection: DBConnection) -> None: self._connection = connection + self._cursor = None def execute_query_df(self, sql: str) -> pd.DataFrame: """Create a cursor object using the connection and execute the query, @@ -43,18 +46,31 @@ def execute_query_df(self, sql: str) -> pd.DataFrame: Returns: list[pt.Any]: A list of rows returned by the query. """ - cursor: DBCursor = self._connection.cursor() + self._cursor = self._connection.cursor() try: - cursor.execute(sql) + self.cursor.execute(sql) except Exception as e: print(f"ERROR WHILE EXECUTING QUERY:\n{sql}") raise e - column_names: list[str] = [description[0] for description in cursor.description] - # No need to close the cursor, as its closed by del. - # TODO: (gh #174) Cache the cursor? - # TODO: (gh #175) enable typed DataFrames. - data = cursor.fetchall() - return pd.DataFrame(data, columns=column_names) + + # This is only for MyPy to pass and know about fetch_pandas_all() + # NOTE: Code does not run in type checking mode, so we need to + # check at run-time if the cursor has the method. + if TYPE_CHECKING: + _ = cast(SnowflakeCursor, self.cursor).fetch_pandas_all + # At run-time check and run the fetch. + if hasattr(self.cursor, "fetch_pandas_all"): + return self.cursor.fetch_pandas_all() + else: + # Assume sqlite3 + column_names: list[str] = [ + description[0] for description in self.cursor.description + ] + # No need to close the cursor, as its closed by del. + # TODO: (gh #174) Cache the cursor? + # TODO: (gh #175) enable typed DataFrames. + data = self.cursor.fetchall() + return pd.DataFrame(data, columns=column_names) # TODO: Consider adding a streaming API for large queries. It's not yet clear # how this will be available at a user API level. @@ -70,6 +86,15 @@ def connection(self) -> DBConnection: """ return self._connection + @property + def cursor(self) -> DBCursor: + """Get the database cursor. + + Returns: + DBCursor: The database cursor PyDough is managing. + """ + return self._cursor + class DatabaseDialect(Enum): """Enum for the supported database dialects. @@ -77,6 +102,7 @@ class DatabaseDialect(Enum): ANSI = "ansi" SQLITE = "sqlite" + SNOWFLAKE = "snowflake" MYSQL = "mysql" @staticmethod diff --git a/pydough/database_connectors/db_types.py b/pydough/database_connectors/db_types.py index c1c7c8bd8..a01f52cda 100644 --- a/pydough/database_connectors/db_types.py +++ b/pydough/database_connectors/db_types.py @@ -3,7 +3,7 @@ use `if TYPE_CHECKING:` to import database-specific modules in a way that allows static type checkers to understand the types without triggering runtime imports. This avoids runtime errors when some optional dependencies -are not installed. +(e.g., `snowflake-connector-python`) are not installed. """ from typing import TYPE_CHECKING, Any, TypeAlias @@ -15,11 +15,18 @@ # without requiring these modules at runtime unless they are actually used. import sqlite3 + SQLiteConn: TypeAlias = sqlite3.Connection + SQLiteCursor: TypeAlias = sqlite3.Cursor + + import snowflake.connector + import snowflake.connector.cursor + + SnowflakeConn: TypeAlias = snowflake.connector.Connection + SnowflakeCursor: TypeAlias = snowflake.connector.cursor.SnowflakeCursor + import mysql.connector import mysql.connector.cursor - SQLiteConn: TypeAlias = sqlite3.Connection - SQLiteCursor: TypeAlias = sqlite3.Cursor MySQLConn: TypeAlias = mysql.connector.MySQLConnection MySQLCursor: TypeAlias = mysql.connector.cursor.MySQLCursor @@ -32,17 +39,17 @@ # Dialect1_Cursor: TypeAlias = dialect1_module.Cursor # 4. Define the type aliases for database connections and cursors - DBConnection: TypeAlias = SQLiteConn | MySQLConn # | Dialect1_Conn - DBCursor: TypeAlias = SQLiteCursor | MySQLCursor # | Dialect1_Cursor + DBConnection: TypeAlias = SQLiteConn | SnowflakeConn | MySQLConn + DBCursor: TypeAlias = SQLiteCursor | SnowflakeCursor | MySQLCursor else: DBConnection: TypeAlias = Any DBCursor: TypeAlias = Any SQLiteConn: TypeAlias = Any SQLiteCursor: TypeAlias = Any + SnowflakeCursor: TypeAlias = Any + SnowflakeConn: TypeAlias = Any MySQLConn: TypeAlias = Any MySQLCursor: TypeAlias = Any - # Dialect1_Conn: TypeAlias = Any - # Dialect1_Cursor: TypeAlias = Any # This allows us to use these type aliases in the rest of the code # without worrying about whether the specific database modules are available. @@ -53,8 +60,8 @@ "MySQLCursor", "SQLiteConn", "SQLiteCursor", - # "Dialect1_Conn", - # "Dialect1_Cursor", + "SnowflakeConn", + "SnowflakeCursor", ] # The type aliases are used to provide a consistent interface for database connections # and cursors across different database backends, allowing for easier diff --git a/pydough/sqlglot/__init__.py b/pydough/sqlglot/__init__.py index 4bdf4f9d1..8edc1b483 100644 --- a/pydough/sqlglot/__init__.py +++ b/pydough/sqlglot/__init__.py @@ -15,7 +15,11 @@ convert_relation_to_sql, execute_df, ) -from .sqlglot_helpers import get_glot_name, set_glot_alias, unwrap_alias +from .sqlglot_helpers import ( + get_glot_name, + set_glot_alias, + unwrap_alias, +) from .sqlglot_identifier_finder import find_identifiers, find_identifiers_in_list from .sqlglot_relational_expression_visitor import SQLGlotRelationalExpressionVisitor from .sqlglot_relational_visitor import SQLGlotRelationalVisitor diff --git a/pydough/sqlglot/execute_relational.py b/pydough/sqlglot/execute_relational.py index c1d6a49a3..e9147bce4 100644 --- a/pydough/sqlglot/execute_relational.py +++ b/pydough/sqlglot/execute_relational.py @@ -9,6 +9,7 @@ from sqlglot import parse_one from sqlglot.dialects import Dialect as SQLGlotDialect from sqlglot.dialects import MySQL as MySQLDialect +from sqlglot.dialects import Snowflake as SnowflakeDialect from sqlglot.dialects import SQLite as SQLiteDialect from sqlglot.errors import SqlglotError from sqlglot.expressions import Alias, Column, Select, Table, With @@ -24,7 +25,6 @@ from sqlglot.optimizer.optimize_joins import optimize_joins from sqlglot.optimizer.qualify import qualify from sqlglot.optimizer.scope import traverse_scope, walk_in_scope -from sqlglot.optimizer.simplify import simplify from pydough.configs import PyDoughConfigs from pydough.database_connectors import ( @@ -40,6 +40,7 @@ from .override_merge_subqueries import merge_subqueries from .override_pushdown_predicates import pushdown_predicates from .override_pushdown_projections import pushdown_projections +from .override_simplify import simplify from .override_unnest_subqueries import unnest_subqueries from .sqlglot_relational_visitor import SQLGlotRelationalVisitor @@ -100,14 +101,18 @@ def apply_sqlglot_optimizer( # Apply each rule explicitly with appropriate kwargs + kwargs = { + "quote_identifiers": False, + "isolate_tables": True, + "validate_qualify_columns": False, + } + # Exclude Snowflake dialect to avoid some issues + # related to name qualification + if not isinstance(dialect, SnowflakeDialect): + kwargs["dialect"] = dialect + # Rewrite sqlglot AST to have normalized and qualified tables and columns. - glot_expr = qualify( - glot_expr, - dialect=dialect, - quote_identifiers=False, - isolate_tables=True, - validate_qualify_columns=False, - ) + glot_expr = qualify(glot_expr, **kwargs) # Rewrite sqlglot AST to remove unused columns projections. glot_expr = pushdown_projections(glot_expr) @@ -391,6 +396,8 @@ def convert_dialect_to_sqlglot(dialect: DatabaseDialect) -> SQLGlotDialect: return SQLGlotDialect() elif dialect == DatabaseDialect.SQLITE: return SQLiteDialect() + elif dialect == DatabaseDialect.SNOWFLAKE: + return SnowflakeDialect() elif dialect == DatabaseDialect.MYSQL: return MySQLDialect() else: diff --git a/pydough/sqlglot/override_simplify.py b/pydough/sqlglot/override_simplify.py new file mode 100644 index 000000000..c1b695a17 --- /dev/null +++ b/pydough/sqlglot/override_simplify.py @@ -0,0 +1,227 @@ +""" +Overridden version of the simplify.py file from sqlglot. +""" + +from __future__ import annotations + +import datetime +import typing as t + +from sqlglot import Dialect, exp +from sqlglot.helper import merge_ranges, while_changing +from sqlglot.optimizer.simplify import ( + DATETRUNC_BINARY_COMPARISONS, + DATETRUNC_COMPARISONS, + DATETRUNCS, + FINAL, + UnsupportedUnit, + _datetrunc_eq_expression, + _datetrunc_range, + _is_datetrunc_predicate, + absorb_and_eliminate, + catch, + connector_depth, + date_floor, + date_literal, + extract_date, + extract_type, + flatten, + logger, + propagate_constants, + remove_complements, + remove_where_true, + rewrite_between, + simplify_coalesce, + simplify_concat, + simplify_conditionals, + simplify_connectors, + simplify_equality, + simplify_literals, + simplify_not, + simplify_parens, + simplify_startswith, + sort_comparison, + uniq_sort, +) + +if t.TYPE_CHECKING: + from sqlglot.dialects.dialect import DialectType + + DateTruncBinaryTransform = t.Callable[ + [exp.Expression, datetime.date, str, Dialect, exp.DataType], + exp.Expression | None, + ] + + +def simplify( + expression: exp.Expression, + constant_propagation: bool = False, + dialect: DialectType = None, + max_depth: int | None = None, +): + """ + Rewrite sqlglot AST to simplify expressions. + + Example: + >>> import sqlglot + >>> expression = sqlglot.parse_one("TRUE AND TRUE") + >>> simplify(expression).sql() + 'TRUE' + + Args: + expression: expression to simplify + constant_propagation: whether the constant propagation rule should be used + max_depth: Chains of Connectors (AND, OR, etc) exceeding `max_depth` will be skipped + Returns: + sqlglot.Expression: simplified expression + """ + + dialect = Dialect.get_or_raise(dialect) + + def _simplify(expression, root=True): + if ( + max_depth + and isinstance(expression, exp.Connector) + and not isinstance(expression.parent, exp.Connector) + ): + depth = connector_depth(expression) + if depth > max_depth: + logger.info( + f"Skipping simplification because connector depth {depth} exceeds max {max_depth}" + ) + return expression + + if expression.meta.get(FINAL): + return expression + + # group by expressions cannot be simplified, for example + # select x + 1 + 1 FROM y GROUP BY x + 1 + 1 + # the projection must exactly match the group by key + group = expression.args.get("group") + + if group and hasattr(expression, "selects"): + groups = set(group.expressions) + group.meta[FINAL] = True + + for e in expression.selects: + for node in e.walk(): + if node in groups: + e.meta[FINAL] = True + break + + having = expression.args.get("having") + if having: + for node in having.walk(): + if node in groups: + having.meta[FINAL] = True + break + + # Pre-order transformations + node = expression + node = rewrite_between(node) + node = uniq_sort(node, root) + node = absorb_and_eliminate(node, root) + node = simplify_concat(node) + node = simplify_conditionals(node) + + if constant_propagation: + node = propagate_constants(node, root) + + exp.replace_children(node, lambda e: _simplify(e, False)) + + # Post-order transformations + node = simplify_not(node) + node = flatten(node) + node = simplify_connectors(node, root) + node = remove_complements(node, root) + node = simplify_coalesce(node) + node.parent = expression.parent + node = simplify_literals(node, root) + node = simplify_equality(node) + node = simplify_parens(node) + node = simplify_datetrunc(node, dialect) + node = sort_comparison(node) + node = simplify_startswith(node) + + if root: + expression.replace(node) + return node + + expression = while_changing(expression, _simplify) + remove_where_true(expression) + return expression + + +@catch(ModuleNotFoundError, UnsupportedUnit) +def simplify_datetrunc(expression: exp.Expression, dialect: Dialect) -> exp.Expression: + """Simplify expressions like `DATE_TRUNC('year', x) >= CAST('2021-01-01' AS DATE)`""" + comparison = expression.__class__ + + if isinstance(expression, DATETRUNCS): + this = expression.this + trunc_type = extract_type(this) + date = extract_date(this) + #### Start of PyDough Change #### + # If date is datetime.datetime, it should NOT enter the if statement + # because `date_floor` only works correctly on datetime.date + if date and not isinstance(date, datetime.datetime) and expression.unit: + return date_literal( + date_floor(date, expression.unit.name.lower(), dialect), trunc_type + ) + #### End of PyDough Change #### + elif comparison not in DATETRUNC_COMPARISONS: + return expression + + if isinstance(expression, exp.Binary): + l, r = expression.left, expression.right # noqa: E741 + + if not _is_datetrunc_predicate(l, r): + return expression + + l = t.cast(exp.DateTrunc, l) # noqa: E741 + trunc_arg = l.this + unit = l.unit.name.lower() + date = extract_date(r) + + if not date: + return expression + + return ( + DATETRUNC_BINARY_COMPARISONS[comparison]( + trunc_arg, date, unit, dialect, extract_type(r) + ) + or expression + ) + + if isinstance(expression, exp.In): + l = expression.this # noqa: E741 + rs = expression.expressions + + if rs and all(_is_datetrunc_predicate(l, r) for r in rs): + l = t.cast(exp.DateTrunc, l) # noqa: E741 + unit = l.unit.name.lower() + + ranges = [] + for r in rs: + date = extract_date(r) + if not date: + return expression + drange = _datetrunc_range(date, unit, dialect) + if drange: + ranges.append(drange) + + if not ranges: + return expression + + ranges = merge_ranges(ranges) + target_type = extract_type(*rs) + + return exp.or_( + *[ + _datetrunc_eq_expression(l, drange, target_type) + for drange in ranges + ], + copy=False, + ) + + return expression diff --git a/pydough/sqlglot/sqlglot_relational_expression_visitor.py b/pydough/sqlglot/sqlglot_relational_expression_visitor.py index a19e35f07..bb9f627ad 100644 --- a/pydough/sqlglot/sqlglot_relational_expression_visitor.py +++ b/pydough/sqlglot/sqlglot_relational_expression_visitor.py @@ -223,6 +223,9 @@ def visit_window_expression(self, window_expression: WindowCallExpression) -> No order_exprs.append(sqlglot_expressions.convert("1")) this: SQLGlotExpression window_spec: sqlglot_expressions.WindowSpec | None = None + input_types: list[PyDoughType] = [ + arg.data_type for arg in window_expression.inputs + ] match window_expression.op.function_name: case "PERCENTILE": # Extract the number of buckets to use for the percentile @@ -263,7 +266,9 @@ def visit_window_expression(self, window_expression: WindowCallExpression) -> No ) this = func(**lag_args) case "RELSUM": - this = sqlglot_expressions.Sum.from_arg_list(arg_exprs) + this = self._bindings.convert_call_to_sqlglot( + pydop.SUM, arg_exprs, input_types + ) window_spec = self.get_window_spec(window_expression.kwargs) case "RELAVG": this = sqlglot_expressions.Avg.from_arg_list(arg_exprs) diff --git a/pydough/sqlglot/transform_bindings/README.md b/pydough/sqlglot/transform_bindings/README.md index 9ec33760a..5623d92fa 100644 --- a/pydough/sqlglot/transform_bindings/README.md +++ b/pydough/sqlglot/transform_bindings/README.md @@ -8,6 +8,7 @@ The Transform Bindings module provides the following notable APIs: - `BaseTransformBindings`: The base class for converting function calls from relational expressions into the SQLGlot AST, used for generic ANSI SQL. - `SQLiteTransformBindings`: Subclass of `BaseTransformBindings` for the SQLite dialect, providing SQLite-specific implementations. +- `SnowflakeTransformBindings`: Subclass of `BaseTransformBindings` for the Snowflake dialect, providing Snowflake-specific implementations. - `MySQLTransformBindings`: Subclass of `BaseTransformBindings` for the MySQL dialect, providing MySQL-specific implementations. - `bindings_from_dialect`: Factory function that returns the appropriate binding instance for a specific database dialect. diff --git a/pydough/sqlglot/transform_bindings/__init__.py b/pydough/sqlglot/transform_bindings/__init__.py index 5f7f161c9..9e06d3fce 100644 --- a/pydough/sqlglot/transform_bindings/__init__.py +++ b/pydough/sqlglot/transform_bindings/__init__.py @@ -7,6 +7,7 @@ "BaseTransformBindings", "MySQLTransformBindings", "SQLiteTransformBindings", + "SnowflakeTransformBindings", "bindings_from_dialect", ] @@ -17,6 +18,7 @@ from .base_transform_bindings import BaseTransformBindings from .mysql_transform_bindings import MySQLTransformBindings +from .sf_transform_bindings import SnowflakeTransformBindings from .sqlite_transform_bindings import SQLiteTransformBindings if TYPE_CHECKING: @@ -45,6 +47,8 @@ def bindings_from_dialect( return BaseTransformBindings(configs, visitor) case DatabaseDialect.SQLITE: return SQLiteTransformBindings(configs, visitor) + case DatabaseDialect.SNOWFLAKE: + return SnowflakeTransformBindings(configs, visitor) case DatabaseDialect.MYSQL: return MySQLTransformBindings(configs, visitor) case _: diff --git a/pydough/sqlglot/transform_bindings/base_transform_bindings.py b/pydough/sqlglot/transform_bindings/base_transform_bindings.py index 2690efe6c..5a1ff17a9 100644 --- a/pydough/sqlglot/transform_bindings/base_transform_bindings.py +++ b/pydough/sqlglot/transform_bindings/base_transform_bindings.py @@ -87,7 +87,6 @@ def dialect_dow_mapping(self) -> dict[str, int]: standard_func_bindings: dict[ pydop.PyDoughExpressionOperator, sqlglot_expressions.Func ] = { - pydop.SUM: sqlglot_expressions.Sum, pydop.AVG: sqlglot_expressions.Avg, pydop.MIN: sqlglot_expressions.Min, pydop.MAX: sqlglot_expressions.Max, @@ -214,6 +213,8 @@ def convert_call_to_sqlglot( return self.convert_sign(args, types) case pydop.ROUND: return self.convert_round(args, types) + case pydop.SUM: + return self.convert_sum(args, types) case pydop.CEIL: return self.convert_ceil(args, types) case pydop.FLOOR: @@ -292,6 +293,14 @@ def make_datetime_arg(self, expr: SQLGlotExpression) -> SQLGlotExpression: return self.handle_datetime_base_arg(expr) return expr + def convert_sum( + self, args: SQLGlotExpression, types: list[PyDoughType] + ) -> SQLGlotExpression: + """ + Converts a SUM function call to its SQLGlot equivalent. + """ + return sqlglot_expressions.Sum.from_arg_list(args) + def convert_find( self, args: list[SQLGlotExpression], @@ -753,7 +762,7 @@ def convert_slice( ), expression=sql_zero, ), - sql_empty_str, # If length ≀ 0, return empty string + sql_zero, # If length ≀ 0, return empty string # Otherwise calculate actual length sqlglot_expressions.Sub( this=stop_idx_adjusted_glot, @@ -795,7 +804,7 @@ def convert_slice( ), expression=sql_zero, ), - sql_empty_str, # If length ≀ 0, return empty string + sql_zero, # If length ≀ 0, return empty string sqlglot_expressions.Sub( # Otherwise calculate actual length this=stop_idx_adjusted_glot, expression=start_idx_adjusted_glot, @@ -1362,10 +1371,17 @@ def apply_datetime_truncation( Returns: The SQLGlot expression to truncate `base`. """ - return sqlglot_expressions.DateTrunc( - this=self.make_datetime_arg(base), - unit=sqlglot_expressions.Var(this=unit.value), - ) + match unit: + case DateTimeUnit.HOUR | DateTimeUnit.MINUTE | DateTimeUnit.SECOND: + return sqlglot_expressions.TimestampTrunc( + this=self.make_datetime_arg(base), + unit=sqlglot_expressions.Var(this=unit.value.lower()), + ) + case _: + return sqlglot_expressions.DateTrunc( + this=self.make_datetime_arg(base), + unit=sqlglot_expressions.Var(this=unit.value.lower()), + ) def apply_datetime_offset( self, base: SQLGlotExpression, amt: int, unit: DateTimeUnit diff --git a/pydough/sqlglot/transform_bindings/sf_transform_bindings.py b/pydough/sqlglot/transform_bindings/sf_transform_bindings.py new file mode 100644 index 000000000..1796a329b --- /dev/null +++ b/pydough/sqlglot/transform_bindings/sf_transform_bindings.py @@ -0,0 +1,164 @@ +""" +Definition of SQLGlot transformation bindings for the Snowflake dialect. +""" + +__all__ = ["SnowflakeTransformBindings"] + + +import sqlglot.expressions as sqlglot_expressions +from sqlglot.expressions import Expression as SQLGlotExpression + +import pydough.pydough_operators as pydop +from pydough.types import PyDoughType +from pydough.types.boolean_type import BooleanType + +from .base_transform_bindings import BaseTransformBindings +from .sqlglot_transform_utils import DateTimeUnit + + +class SnowflakeTransformBindings(BaseTransformBindings): + """ + Subclass of BaseTransformBindings for the Snowflake dialect. + """ + + PYDOP_TO_SNOWFLAKE_FUNC: dict[pydop.PyDoughExpressionOperator, str] = { + pydop.STARTSWITH: "STARTSWITH", + pydop.ENDSWITH: "ENDSWITH", + pydop.CONTAINS: "CONTAINS", + pydop.LPAD: "LPAD", + pydop.RPAD: "RPAD", + pydop.SIGN: "SIGN", + pydop.SMALLEST: "LEAST", + pydop.LARGEST: "GREATEST", + pydop.GETPART: "SPLIT_PART", + } + """ + Mapping of PyDough operators to equivalent Snowflake SQL function names + These are used to generate anonymous function calls in SQLGlot + """ + + def convert_call_to_sqlglot( + self, + operator: pydop.PyDoughExpressionOperator, + args: list[SQLGlotExpression], + types: list[PyDoughType], + ) -> SQLGlotExpression: + if operator in self.PYDOP_TO_SNOWFLAKE_FUNC: + return sqlglot_expressions.Anonymous( + this=self.PYDOP_TO_SNOWFLAKE_FUNC[operator], expressions=args + ) + + return super().convert_call_to_sqlglot(operator, args, types) + + def convert_sum( + self, arg: SQLGlotExpression, types: list[PyDoughType] + ) -> SQLGlotExpression: + """ + Converts a SUM function call to its SQLGlot equivalent. + This method checks the type of the argument to determine whether to use + COUNT_IF (for BooleanType) or SUM (for other types). + Arguments: + `arg` : The argument to the SUM function. + `types` : The types of the arguments. + """ + match types[0]: + # If the argument is of BooleanType, it uses COUNT_IF to count true values. + case BooleanType(): + return sqlglot_expressions.CountIf(this=arg[0]) + case _: + # For other types, use SUM directly + return sqlglot_expressions.Sum(this=arg[0]) + + def convert_extract_datetime( + self, + args: list[SQLGlotExpression], + types: list[PyDoughType], + unit: DateTimeUnit, + ) -> SQLGlotExpression: + # Update argument type to fit datetime + dt_expr: SQLGlotExpression = self.handle_datetime_base_arg(args[0]) + func_expr: SQLGlotExpression + match unit: + case DateTimeUnit.YEAR: + func_expr = sqlglot_expressions.Year(this=dt_expr) + case DateTimeUnit.QUARTER: + func_expr = sqlglot_expressions.Quarter(this=dt_expr) + case DateTimeUnit.MONTH: + func_expr = sqlglot_expressions.Month(this=dt_expr) + case DateTimeUnit.DAY: + func_expr = sqlglot_expressions.Day(this=dt_expr) + case DateTimeUnit.HOUR | DateTimeUnit.MINUTE | DateTimeUnit.SECOND: + func_expr = sqlglot_expressions.Anonymous( + this=unit.value.upper(), expressions=[dt_expr] + ) + return func_expr + + def apply_datetime_truncation( + self, base: SQLGlotExpression, unit: DateTimeUnit + ) -> SQLGlotExpression: + if unit is DateTimeUnit.WEEK: + # 1. Get shifted_weekday (# of days since the start of week) + # 2. Subtract shifted_weekday DAYS from the datetime + # 3. Truncate the result to the nearest day + shifted_weekday: SQLGlotExpression = self.days_from_start_of_week(base) + date_sub: SQLGlotExpression = sqlglot_expressions.DateSub( + this=base, + expression=shifted_weekday, + unit=sqlglot_expressions.Var(this="DAY"), + ) + return sqlglot_expressions.DateTrunc( + this=date_sub, + unit=sqlglot_expressions.Var(this="DAY"), + ) + else: + # For other units, use the standard SQLGlot truncation + return super().apply_datetime_truncation(base, unit) + + def convert_datediff( + self, + args: list[SQLGlotExpression], + types: list[PyDoughType], + ) -> SQLGlotExpression: + assert len(args) == 3 + if not isinstance(args[0], sqlglot_expressions.Literal): + raise ValueError( + f"Unsupported argument {args[0]} for DATEDIFF.It should be a string." + ) + elif not args[0].is_string: + raise ValueError( + f"Unsupported argument {args[0]} for DATEDIFF.It should be a string." + ) + unit: DateTimeUnit | None = DateTimeUnit.from_string(args[0].this) + if unit is DateTimeUnit.WEEK: + args = [ + args[0], + self.make_datetime_arg(args[1]), + self.make_datetime_arg(args[2]), + ] + # 1. For both dates, get # of shifted of days since the start of week + shifted_start: SQLGlotExpression = self.days_from_start_of_week(args[1]) + shifted_end: SQLGlotExpression = self.days_from_start_of_week(args[2]) + + # 2. Subtract shifted_weekday DAYS from the datetime + + date_sub_start: SQLGlotExpression = sqlglot_expressions.DateSub( + this=args[1], + expression=shifted_start, + unit=sqlglot_expressions.Var(this="DAY"), + ) + + date_sub_end: SQLGlotExpression = sqlglot_expressions.DateSub( + this=args[2], + expression=shifted_end, + unit=sqlglot_expressions.Var(this="DAY"), + ) + + # 3. Call DATEDIFF in weeks with the shifted dates + return sqlglot_expressions.DateDiff( + unit=sqlglot_expressions.Var(this=unit.value), + this=date_sub_end, + expression=date_sub_start, + ) + else: + # For other units, use base implementation + return super().convert_datediff(args, types) diff --git a/pyproject.toml b/pyproject.toml index c838208b9..c94b0296d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,9 +27,17 @@ dependencies = ["pytz", "sqlglot==26.7.0", "pandas>=2.0.0", "jupyterlab"] Repository = "https://github.com/bodo-ai/PyDough" [tool.uv] -dev-dependencies = ["pre-commit", "pytest", "ruff==0.6.7", "pytest-repeat"] +dev-dependencies = [ + "pre-commit", + "pytest", + "ruff==0.6.7", + "pytest-repeat", + "pydough[snowflake]", + "pydough[mysql]", +] [project.optional-dependencies] +snowflake = ["snowflake-connector-python[pandas]"] mysql = ["mysql-connector-python"] [build-system] @@ -47,13 +55,13 @@ lint.extend-select = [ "C4", # flake8-comprehensions "TID", # flake8-tidy-imports ] -# Don't run ruff on any ipython notebooks as they may have -# names that don't statically resolve. -exclude = ["**/*.ipynb"] lint.ignore = [ "UP038", ] +# Don't run ruff on any ipython notebooks as they may have +# names that don't statically resolve. +exclude = ["**/*.ipynb"] [tool.mypy] # Don't run mypy on any ipython notebooks diff --git a/pytest.ini b/pytest.ini index 166ec3474..8deed2a05 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] markers = execute: marks tests that do runtime execution (deselect with '-m "not execute"') + snowflake: marks tests that require Snowflake credentials mysql: marks tests that require MySQL credentials diff --git a/tests/conftest.py b/tests/conftest.py index 5dbf43055..d30000c67 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -136,6 +136,14 @@ def sample_graph_path() -> str: return f"{os.path.dirname(__file__)}/test_metadata/sample_graphs.json" +@pytest.fixture(scope="session") +def sf_sample_graph_path() -> str: + """ + Tuple of the path to the JSON file containing the Snowflake sample graphs. + """ + return f"{os.path.dirname(__file__)}/test_metadata/snowflake_sample_graphs.json" + + @pytest.fixture(scope="session") def udf_graph_path() -> str: """ @@ -211,6 +219,43 @@ def impl(name: str) -> GraphMetadata: return impl +@pytest.fixture(scope="session") +def get_sf_sample_graph( + sf_sample_graph_path: str, + valid_sample_graph_names: set[str], +) -> graph_fetcher: + """ + A function that takes in the name of a graph from the supported sample + Snowflake graph names and returns the metadata for that PyDough graph. + """ + + @cache + def impl(name: str) -> GraphMetadata: + if name not in valid_sample_graph_names: + raise Exception(f"Unrecognized graph name '{name}'") + return pydough.parse_json_metadata_from_file( + file_path=sf_sample_graph_path, graph_name=name + ) + + return impl + + +@pytest.fixture(scope="session") +def get_sf_defog_graphs() -> graph_fetcher: + """ + Returns the graphs for the defog database in Snowflake. + """ + + @cache + def impl(name: str) -> GraphMetadata: + path: str = ( + f"{os.path.dirname(__file__)}/test_metadata/snowflake_defog_graphs.json" + ) + return pydough.parse_json_metadata_from_file(file_path=path, graph_name=name) + + return impl + + @pytest.fixture(scope="session") def get_udf_graph( udf_graph_path: str, valid_udf_graph_names: set[str] @@ -319,6 +364,7 @@ def sqlite_dialects(request) -> DatabaseDialect: params=[ pytest.param(DatabaseDialect.ANSI, id="ansi"), pytest.param(DatabaseDialect.SQLITE, id="sqlite"), + pytest.param(DatabaseDialect.SNOWFLAKE, id="snowflake"), pytest.param(DatabaseDialect.MYSQL, id="mysql"), ] ) @@ -494,6 +540,75 @@ def sqlite_technograph_connection() -> DatabaseContext: return DatabaseContext(DatabaseConnection(connection), DatabaseDialect.SQLITE) +SF_ENVS = ["SF_USERNAME", "SF_PASSWORD", "SF_ACCOUNT"] +""" + Snowflake environment variables required for connection. + SF_USERNAME: The username for the Snowflake account. + SF_PASSWORD: The password for the Snowflake account. + SF_ACCOUNT: The account identifier for the Snowflake account. +""" + + +def is_snowflake_env_set() -> bool: + """ + Check if the Snowflake environment variables are set. + + Returns: + bool: True if all required Snowflake environment variables are set, False otherwise. + """ + return all(os.getenv(env) for env in SF_ENVS) + + +@pytest.fixture +def sf_conn_db_context() -> Callable[[str, str], DatabaseContext]: + """ + This fixture is used to connect to the Snowflake TPCH database using + a connection object. + Return a DatabaseContext for the Snowflake TPCH database. + """ + + def _impl(database_name: str, schema_name: str) -> DatabaseContext: + if not is_snowflake_env_set(): + pytest.skip("Skipping Snowflake tests: environment variables not set.") + import snowflake.connector as sf_connector + + warehouse = "DEMO_WH" + password = os.getenv("SF_PASSWORD") + username = os.getenv("SF_USERNAME") + account = os.getenv("SF_ACCOUNT") + connection: sf_connector.connection.SnowflakeConnection = sf_connector.connect( + user=username, + password=password, + account=account, + warehouse=warehouse, + database=database_name, + schema=schema_name, + ) + # Run DEFOG_DAILY_UPDATE() only if data is older than 1 day + with connection.cursor() as cur: + cur.execute(""" + DECLARE last_mod DATE; + + BEGIN + -- Get table last modified date + SELECT DATE(LAST_ALTERED) INTO last_mod + FROM INFORMATION_SCHEMA.TABLES + WHERE table_catalog='DEFOG' + AND table_schema = 'BROKER' + AND table_name = 'SBDAILYPRICE'; + + -- If last modified is before today, call the procedure + IF (last_mod < CURRENT_DATE()) THEN + CALL DEFOG.BROKER.DEFOG_DAILY_UPDATE(); + END IF; + END; + """) + + return load_database_context("snowflake", connection=connection) + + return _impl + + MYSQL_ENVS = ["MYSQL_USERNAME", "MYSQL_PASSWORD"] """ The MySQL environment variables required for connection. @@ -584,11 +699,15 @@ def mysql_docker_setup() -> None: except subprocess.CalledProcessError as e: pytest.fail(f"Failed to set up MySQL Docker container: {e}") + # Check import is successful + try: + import mysql.connector as mysql_connector + except ImportError as e: + raise RuntimeError("mysql-connector-python is not installed") from e + # Wait for MySQL to be ready for _ in range(30): try: - import mysql.connector as mysql_connector - conn = mysql_connector.connect( host=MYSQL_HOST, port=MYSQL_PORT, @@ -598,7 +717,8 @@ def mysql_docker_setup() -> None: ) conn.close() break - except mysql_connector.Error: + except mysql_connector.Error as e: + print("Error occurred while connecting to MySQL:", e) time.sleep(1) else: subprocess.run(["docker", "rm", "-f", MYSQL_DOCKER_CONTAINER]) @@ -663,6 +783,32 @@ def _impl(database_name: str) -> DatabaseContext: return _impl +@pytest.fixture +def sf_params_tpch_db_context() -> DatabaseContext: + """ + This fixture is used to connect to the Snowflake TPCH database using + parameters instead of a connection object. + Return a DatabaseContext for the Snowflake TPCH database. + """ + if not is_snowflake_env_set(): + pytest.skip("Skipping Snowflake tests: environment variables not set.") + sf_tpch_db = "SNOWFLAKE_SAMPLE_DATA" + sf_tpch_schema = "TPCH_SF1" + warehouse = "DEMO_WH" + password = os.getenv("SF_PASSWORD") + username = os.getenv("SF_USERNAME") + account = os.getenv("SF_ACCOUNT") + return load_database_context( + "snowflake", + user=username, + password=password, + account=account, + warehouse=warehouse, + database=sf_tpch_db, + schema=sf_tpch_schema, + ) + + @pytest.fixture(scope="session") def mysql_params_tpch_db_context( require_mysql_env, mysql_docker_setup diff --git a/tests/gen_data/init_defog_mysql.sql b/tests/gen_data/init_defog_mysql.sql index 02aa196e5..a97ad2066 100644 --- a/tests/gen_data/init_defog_mysql.sql +++ b/tests/gen_data/init_defog_mysql.sql @@ -553,12 +553,17 @@ INSERT INTO `sales` (`_id`, `car_id`, `salesperson_id`, `customer_id`, `sale_pri (14, 2, 3, 1, 23200.00, SUBDATE(CURDATE(), INTERVAL 21 DAY)), (15, 8, 6, 12, 43500.00, SUBDATE(CURDATE(), INTERVAL 3 DAY)), (16, 10, 4, 2, 29500.00, SUBDATE(CURDATE(), INTERVAL 5 DAY)), - (17, 3, 2, 3, 46000.00, SUBDATE(CURDATE(), INTERVAL 7 DAY) + INTERVAL 1 DAY), - (18, 3, 2, 7, 47500.00, SUBDATE(CURDATE(), INTERVAL 7 DAY)), - (19, 3, 2, 10, 46500.00, SUBDATE(CURDATE(), INTERVAL 7 DAY) - INTERVAL 1 DAY), - (20, 4, 1, 3, 48000.00, SUBDATE(CURDATE(), INTERVAL 56 DAY) + INTERVAL 1 DAY), - (21, 4, 1, 7, 45000.00, SUBDATE(CURDATE(), INTERVAL 56 DAY)), - (22, 4, 1, 10, 49000.00, SUBDATE(CURDATE(), INTERVAL 56 DAY) - INTERVAL 1 DAY); + -- Expression to truncate the current date to the most recent Monday in MySQL: + -- `DT - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY` + -- Reason: MySQL's DAYOFWEEK() returns 1 for Sunday, 2 for Monday, ..., 7 for + -- Saturday. We subtract 2 so that Monday becomes 0, Tuesday becomes 1, etc. + -- and Sunday becomes -1 (which becomes 6 when taken modulo 7). + (17, 3, 2, 3, 46000.00, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 7 DAY + INTERVAL 1 DAY), + (18, 3, 2, 7, 47500.00, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 7 DAY), + (19, 3, 2, 10, 46500.00, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 7 DAY - INTERVAL 1 DAY), + (20, 4, 1, 3, 48000.00, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 56 DAY + INTERVAL 1 DAY), + (21, 4, 1, 7, 45000.00, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 56 DAY), + (22, 4, 1, 10, 49000.00, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 56 DAY - INTERVAL 1 DAY); INSERT INTO `inventory_snapshots` (`_id`, `snapshot_date`, `car_id`, `is_in_inventory`) VALUES (1, '2023-03-15', 1, TRUE), @@ -603,12 +608,13 @@ INSERT INTO `payments_received` (`_id`, `sale_id`, `payment_date`, `payment_amou (15, 14, SUBDATE(CURDATE(), INTERVAL 1 DAY), 17200.00, 'financing'), (16, 15, SUBDATE(CURDATE(), INTERVAL 1 DAY), 37500.00, 'credit_card'), (17, 16, SUBDATE(CURDATE(), INTERVAL 5 DAY), 26500.00, 'debit_card'), - (18, 17, SUBDATE(CURDATE(), INTERVAL 7 DAY) + INTERVAL 1 DAY, 115000.00, 'financing'), - (19, 18, SUBDATE(CURDATE(), INTERVAL 7 DAY), 115000.00, 'credit_card'), - (20, 19, SUBDATE(CURDATE(), INTERVAL 7 DAY) - INTERVAL 1 DAY, 115000.00, 'debit_card'), - (21, 20, SUBDATE(CURDATE(), INTERVAL 56 DAY) + INTERVAL 1 DAY, 115000.00, 'cash'), - (22, 21, SUBDATE(CURDATE(), INTERVAL 56 DAY), 115000.00, 'check'), - (23, 22, SUBDATE(CURDATE(), INTERVAL 56 DAY) - INTERVAL 1 DAY, 115000.00, 'credit_card'); + (18, 17, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 7 DAY + INTERVAL 1 DAY, 115000.00, 'financing'), + (19, 18, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 7 DAY, 115000.00, 'credit_card'), + (20, 19, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 7 DAY - INTERVAL 1 DAY, 115000.00, 'debit_card'), + (21, 20, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 56 DAY + INTERVAL 1 DAY, 115000.00, 'cash'), + (22, 21, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 56 DAY, 115000.00, 'check'), + (23, 22, CURRENT_DATE - INTERVAL ((DAYOFWEEK(CURRENT_DATE) - 2) % 7) DAY - INTERVAL 56 DAY - INTERVAL 1 DAY, 115000.00, 'credit_card'); + INSERT INTO `payments_made` (`_id`, `vendor_name`, `payment_date`, `payment_amount`, `payment_method`, `invoice_number`, `invoice_date`, `due_date`) VALUES (1, 'Car Manufacturer Inc', '2023-03-01', 150000.00, 'bank_transfer', 'INV-001', '2023-02-25', '2023-03-25'), diff --git a/tests/gen_data/init_defog_sf.sql b/tests/gen_data/init_defog_sf.sql new file mode 100644 index 000000000..b4f2434bb --- /dev/null +++ b/tests/gen_data/init_defog_sf.sql @@ -0,0 +1,816 @@ +------------------------------------------------------------------------------- +-- Modified version of the defog.ai schema setup Defog database +-- CREATE DATABASE DEFOG; +------------------------------------------------------------------------------- + + +------------------------------------------------------------------------------- +-- For the BROKER SCHEMA +-- https://github.com/defog-ai/defog-data/blob/main/defog_data/broker/broker.sql + +-- Snowflake dialect compatibility replace: +-- - NOW() to CURRENT_DATE +-- - Cast timestamp strings e.g. +-- '2023-04-01 09:30:00' -> '2023-04-01 09:30:00'::timestamp +------------------------------------------------------------------------------- +CREATE SCHEMA BROKER; + +-- Dimension tables +CREATE TABLE sbCustomer ( + sbCustId varchar(20) PRIMARY KEY, + sbCustName varchar(100) NOT NULL, + sbCustEmail varchar(100) NOT NULL, + sbCustPhone varchar(20), + sbCustAddress1 varchar(200), + sbCustAddress2 varchar(200), + sbCustCity varchar(50), + sbCustState varchar(20), + sbCustCountry varchar(50), + sbCustPostalCode varchar(20), + sbCustJoinDate date NOT NULL, + sbCustStatus varchar(20) NOT NULL -- possible values: active, inactive, suspended, closed +); + +CREATE TABLE sbTicker ( + sbTickerId varchar(20) PRIMARY KEY, + sbTickerSymbol varchar(10) NOT NULL, + sbTickerName varchar(100) NOT NULL, + sbTickerType varchar(20) NOT NULL, -- possible values: stock, etf, mutualfund + sbTickerExchange varchar(50) NOT NULL, + sbTickerCurrency varchar(10) NOT NULL, + sbTickerDb2x varchar(20), -- 2 letter exchange code + sbTickerIsActive boolean NOT NULL +); + +-- Fact tables +CREATE TABLE sbDailyPrice ( + sbDpTickerId varchar(20) NOT NULL, + sbDpDate date NOT NULL, + sbDpOpen numeric(10,2) NOT NULL, + sbDpHigh numeric(10,2) NOT NULL, + sbDpLow numeric(10,2) NOT NULL, + sbDpClose numeric(10,2) NOT NULL, + sbDpVolume bigint NOT NULL, + sbDpEpochMs bigint NOT NULL, -- epoch milliseconds for timestamp + sbDpSource varchar(50) +); + +CREATE TABLE sbTransaction ( + sbTxId varchar(50) PRIMARY KEY, + sbTxCustId varchar(20) NOT NULL, + sbTxTickerId varchar(20) NOT NULL, + sbTxDateTime timestamp NOT NULL, + sbTxType varchar(20) NOT NULL, -- possible values: buy, sell + sbTxShares numeric(10,2) NOT NULL, + sbTxPrice numeric(10,2) NOT NULL, + sbTxAmount numeric(10,2) NOT NULL, + sbTxCcy varchar(10), -- transaction currency + sbTxTax numeric(10,2) NOT NULL, + sbTxCommission numeric(10,2) NOT NULL, + sbTxKpx varchar(10), -- internal code + sbTxSettlementDateStr varchar(25), -- settlement date as string in yyyyMMdd HH:mm:ss format. NULL if not settled + sbTxStatus varchar(10) NOT NULL -- possible values: success, fail, pending +); + + +-- sbCustomer +INSERT INTO sbCustomer (sbCustId, sbCustName, sbCustEmail, sbCustPhone, sbCustAddress1, sbCustCity, sbCustState, sbCustCountry, sbCustPostalCode, sbCustJoinDate, sbCustStatus) VALUES +('C001', 'john doe', 'john.doe@email.com', '555-123-4567', '123 Main St', 'Anytown', 'CA', 'USA', '90001', '2020-01-01', 'active'), +('C002', 'Jane Smith', 'jane.smith@email.com', '555-987-6543', '456 Oak Rd', 'Someville', 'NY', 'USA', '10002', '2019-03-15', 'active'), +('C003', 'Bob Johnson', 'bob.johnson@email.com', '555-246-8135', '789 Pine Ave', 'Mytown', 'TX', 'USA', '75000', '2022-06-01', 'inactive'), +('C004', 'Samantha Lee', 'samantha.lee@email.com', '555-135-7902', '246 Elm St', 'Yourtown', 'CA', 'USA', '92101', '2018-09-22', 'suspended'), +('C005', 'Michael Chen', 'michael.chen@email.com', '555-864-2319', '159 Cedar Ln', 'Anothertown', 'FL', 'USA', '33101', '2021-02-28', 'active'), +('C006', 'Emily Davis', 'emily.davis@email.com', '555-753-1904', '753 Maple Dr', 'Mytown', 'TX', 'USA', '75000', '2020-07-15', 'active'), +('C007', 'David Kim', 'david.kim@email.com', '555-370-2648', '864 Oak St', 'Anothertown', 'FL', 'USA', '33101', '2022-11-05', 'active'), +('C008', 'Sarah Nguyen', 'sarah.nguyen@email.com', '555-623-7419', '951 Pine Rd', 'Yourtown', 'CA', 'USA', '92101', '2019-04-01', 'closed'), +('C009', 'William Garcia', 'william.garcia@email.com', '555-148-5326', '258 Elm Ave', 'Anytown', 'CA', 'USA', '90001', '2021-08-22', 'active'), +('C010', 'Jessica Hernandez', 'jessica.hernandez@email.com', '555-963-8520', '147 Cedar Blvd', 'Someville', 'NY', 'USA', '10002', '2020-03-10', 'inactive'), +('C011', 'Alex Rodriguez', 'alex.rodriguez@email.com', '555-246-1357', '753 Oak St', 'Newtown', 'NJ', 'USA', '08801', '2023-01-15', 'active'), +('C012', 'Olivia Johnson', 'olivia.johnson@email.com', '555-987-6543', '321 Elm St', 'Newtown', 'NJ', 'USA', '08801', '2023-01-05', 'active'), +('C013', 'Ethan Davis', 'ethan.davis@email.com', '555-246-8135', '654 Oak Ave', 'Someville', 'NY', 'USA', '10002', '2023-02-12', 'active'), +('C014', 'Ava Wilson', 'ava.wilson@email.com', '555-135-7902', '987 Pine Rd', 'Anytown', 'CA', 'USA', '90001', '2023-03-20', 'active'), +('C015', 'Emma Brown', 'emma.brown@email.com', '555-987-6543', '789 Oak St', 'Newtown', 'NJ', 'USA', '08801', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '5 months', 'active'), +('C016', 'sophia martinez', 'sophia.martinez@email.com', '555-246-8135', '159 Elm Ave', 'Anytown', 'CA', 'USA', '90001', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '4 months', 'active'), +('C017', 'Jacob Taylor', 'jacob.taylor@email.com', '555-135-7902', '753 Pine Rd', 'Someville', 'NY', 'USA', '10002', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '3 months', 'active'), +('C018', 'Michael Anderson', 'michael.anderson@email.com', '555-864-2319', '321 Cedar Ln', 'Yourtown', 'CA', 'USA', '92101', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months', 'active'), +('C019', 'Isabella Thompson', 'isabella.thompson@email.com', '555-753-1904', '987 Maple Dr', 'Anothertown', 'FL', 'USA', '33101', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month', 'active'), +('C020', 'Maurice Lee', 'maurice.lee@email.com', '555-370-2648', '654 Oak St', 'Mytown', 'TX', 'USA', '75000', DATE_TRUNC('month', CURRENT_DATE), 'active'); + + +-- sbTicker +INSERT INTO sbTicker (sbTickerId, sbTickerSymbol, sbTickerName, sbTickerType, sbTickerExchange, sbTickerCurrency, sbTickerDb2x, sbTickerIsActive) VALUES +('T001', 'AAPL', 'Apple Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T002', 'MSFT', 'Microsoft Corporation', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T003', 'AMZN', 'Amazon.com, Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T004', 'TSLA', 'Tesla, Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T005', 'GOOGL', 'Alphabet Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T006', 'FB', 'Meta Platforms, Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T007', 'BRK.B', 'Berkshire Hathaway Inc.', 'stock', 'NYSE', 'USD', 'NY', true), +('T008', 'JPM', 'JPMorgan Chase & Co.', 'stock', 'NYSE', 'USD', 'NY', true), +('T009', 'V', 'Visa Inc.', 'stock', 'NYSE', 'USD', 'NY', true), +('T010', 'PG', 'Procter & Gamble Company', 'stock', 'NYSE', 'USD', 'NY', true), +('T011', 'SPY', 'SPDR S&P 500 ETF Trust', 'etf', 'NYSE Arca', 'USD', 'NX', true), +('T012', 'QQQ', 'Invesco QQQ Trust', 'etf', 'NASDAQ', 'USD', 'NQ', true), +('T013', 'VTI', 'Vanguard Total Stock Market ETF', 'etf', 'NYSE Arca', 'USD', 'NX', true), +('T014', 'VXUS', 'Vanguard Total International Stock ETF', 'etf', 'NASDAQ', 'USD', 'NQ', true), +('T015', 'VFINX', 'Vanguard 500 Index Fund', 'mutualfund', 'Vanguard', 'USD', 'VG', true), +('T016', 'VTSAX', 'Vanguard Total Stock Market Index Fund', 'mutualfund', 'Vanguard', 'USD', 'VG', true), +('T017', 'VIGAX', 'Vanguard Growth Index Fund', 'mutualfund', 'Vanguard', 'USD', 'VG', true), +('T018', 'GOOG', 'Alphabet Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T019', 'VTI', 'Vanguard Total Stock Market ETF', 'etf', 'NYSE Arca', 'USD', 'NX', true), +('T020', 'VTSAX', 'Vanguard Total Stock Market Index Fund', 'mutualfund', 'Vanguard', 'USD', 'VG', true), +('T021', 'NFLX', 'Netflix, Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true); + +-- sbDailyPrice +INSERT INTO sbDailyPrice (sbDpTickerId, sbDpDate, sbDpOpen, sbDpHigh, sbDpLow, sbDpClose, sbDpVolume, sbDpEpochMs, sbDpSource) VALUES +('T001', '2023-04-01', 150.00, 152.50, 148.75, 151.25, 75000000, 1680336000000, 'NYSE'), +('T002', '2023-04-01', 280.00, 282.75, 279.50, 281.00, 35000000, 1680336000000, 'NASDAQ'), +('T003', '2023-04-01', 3200.00, 3225.00, 3180.00, 3210.00, 4000000, 1680336000000, 'NASDAQ'), +('T004', '2023-04-01', 180.00, 185.00, 178.50, 184.25, 20000000, 1680336000000, 'NASDAQ'), +('T005', '2023-04-01', 2500.00, 2525.00, 2475.00, 2510.00, 1500000, 1680336000000, 'NASDAQ'), +('T006', '2023-04-01', 200.00, 205.00, 198.00, 202.50, 15000000, 1680336000000, 'NASDAQ'), +('T007', '2023-04-01', 400000.00, 402500.00, 398000.00, 401000.00, 10000, 1680336000000, 'NYSE'), +('T008', '2023-04-01', 130.00, 132.50, 128.75, 131.00, 12000000, 1680336000000, 'NYSE'), +('T009', '2023-04-01', 220.00, 222.50, 218.00, 221.00, 8000000, 1680336000000, 'NYSE'), +('T010', '2023-04-01', 140.00, 142.00, 139.00, 141.50, 6000000, 1680336000000, 'NYSE'), +('T001', '2023-04-02', 151.50, 153.00, 150.00, 152.00, 70000000, 1680422400000, 'NYSE'), +('T002', '2023-04-02', 281.25, 283.50, 280.00, 282.75, 32000000, 1680422400000, 'NASDAQ'), +('T003', '2023-04-02', 3212.00, 3230.00, 3200.00, 3225.00, 3800000, 1680422400000, 'NASDAQ'), +('T004', '2023-04-02', 184.50, 187.00, 183.00, 186.00, 18000000, 1680422400000, 'NASDAQ'), +('T005', '2023-04-02', 2512.00, 2530.00, 2500.00, 2520.00, 1400000, 1680422400000, 'NASDAQ'), +('T006', '2023-04-02', 203.00, 206.50, 201.00, 205.00, 14000000, 1680422400000, 'NASDAQ'), +('T007', '2023-04-02', 401500.00, 403000.00, 400000.00, 402000.00, 9500, 1680422400000, 'NYSE'), +('T008', '2023-04-02', 131.25, 133.00, 130.00, 132.50, 11000000, 1680422400000, 'NYSE'), +('T009', '2023-04-02', 221.50, 223.00, 220.00, 222.00, 7500000, 1680422400000, 'NYSE'), +('T010', '2023-04-02', 141.75, 143.00, 140.50, 142.25, 5500000, 1680422400000, 'NYSE'), +('T001', '2023-04-03', 152.25, 154.00, 151.00, 153.50, 65000000, 1680508800000, 'NYSE'), +('T002', '2023-04-03', 283.00, 285.00, 281.50, 284.00, 30000000, 1680508800000, 'NASDAQ'), +('T003', '2023-04-03', 3227.00, 3240.00, 3220.00, 3235.00, 3600000, 1680508800000, 'NASDAQ'), +('T004', '2023-04-03', 186.25, 188.50, 185.00, 187.75, 16000000, 1680508800000, 'NASDAQ'), +('T005', '2023-04-03', 2522.00, 2540.00, 2515.00, 2535.00, 1300000, 1680508800000, 'NASDAQ'), +('T006', '2023-04-03', 205.50, 208.00, 203.50, 207.00, 13000000, 1680508800000, 'NASDAQ'), +('T007', '2023-04-03', 402500.00, 404000.00, 401000.00, 403500.00, 9000, 1680508800000, 'NYSE'), +('T008', '2023-04-03', 132.75, 134.50, 131.50, 133.75, 10000000, 1680508800000, 'NYSE'), +('T009', '2023-04-03', 222.25, 224.00, 221.00, 223.50, 7000000, 1680508800000, 'NYSE'), +('T010', '2023-04-03', 142.50, 144.00, 141.50, 143.25, 5000000, 1680508800000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '8 days', 204.00, 204.50, 202.75, 203.25, 8000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '8 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '6 days', 205.00, 207.50, 203.75, 206.25, 8000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '6 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '5 days', 206.50, 208.00, 205.00, 207.00, 7500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '5 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '4 days', 207.25, 209.00, 206.50, 208.50, 7000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '4 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '3 days', 208.75, 210.50, 207.75, 209.75, 6500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '3 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '2 days', 210.00, 211.75, 209.25, 211.00, 6000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '2 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '1 day', 211.25, 213.00, 210.50, 212.25, 5500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '1 day') * 1000, 'NYSE'), +('T019', CURRENT_DATE, 212.50, 214.25, 211.75, 213.50, 5000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000, 'NYSE'), +('T020', CURRENT_DATE - INTERVAL '6 days', 82.00, 83.00, 81.50, 82.50, 1000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '6 days') * 1000, 'Vanguard'), +('T020', CURRENT_DATE - INTERVAL '5 days', 82.60, 83.60, 82.10, 83.10, 950000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '5 days') * 1000, 'Vanguard'), +('T020', CURRENT_DATE - INTERVAL '4 days', 83.20, 84.20, 82.70, 83.70, 900000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '4 days') * 1000, 'Vanguard'), +('T020', CURRENT_DATE - INTERVAL '3 days', 83.80, 84.80, 83.30, 84.30, 850000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '3 days') * 1000, 'Vanguard'), +('T020', CURRENT_DATE - INTERVAL '2 days', 84.40, 85.40, 83.90, 84.90, 800000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '2 days') * 1000, 'Vanguard'), +('T020', CURRENT_DATE - INTERVAL '1 day', 85.00, 86.00, 84.50, 85.50, 750000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '1 day') * 1000, 'Vanguard'), +('T020', CURRENT_DATE, 85.60, 86.60, 85.10, 86.10, 700000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000, 'Vanguard'), +('T021', CURRENT_DATE - INTERVAL '6 days', 300.00, 305.00, 297.50, 302.50, 10000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '6 days') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE - INTERVAL '5 days', 303.00, 308.00, 300.50, 305.50, 9500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '5 days') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE - INTERVAL '4 days', 306.00, 311.00, 303.50, 308.50, 9000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '4 days') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE - INTERVAL '3 days', 309.00, 314.00, 306.50, 311.50, 8500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '3 days') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE - INTERVAL '2 days', 312.00, 317.00, 309.50, 314.50, 8000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '2 days') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE - INTERVAL '1 day', 315.00, 320.00, 312.50, 317.50, 7500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '1 day') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE, 318.00, 323.00, 315.50, 320.50, 7000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000, 'NASDAQ'); + +-- sbTransaction +INSERT INTO sbTransaction (sbTxId, sbTxCustId, sbTxTickerId, sbTxDateTime, sbTxType, sbTxShares, sbTxPrice, sbTxAmount, sbTxCcy, sbTxTax, sbTxCommission, sbTxKpx, sbTxSettlementDateStr, sbTxStatus) VALUES +('TX001', 'C001', 'T001', '2023-04-01 09:30:00'::timestamp, 'buy', 100, 150.00, 15000.00, 'USD', 75.00, 10.00, 'KP001', '20230401 09:30:00', 'success'), +('TX002', 'C002', 'T002', '2023-04-01 10:15:00'::timestamp, 'sell', 50, 280.00, 14000.00, 'USD', 70.00, 10.00, 'KP002', '20230401 10:15:00', 'success'), +('TX003', 'C003', 'T003', '2023-04-01 11:00:00'::timestamp, 'buy', 10, 3200.00, 32000.00, 'USD', 160.00, 20.00, 'KP003', '20230401 11:00:00', 'success'), +('TX004', 'C003', 'T004', '2023-04-01 11:45:00'::timestamp, 'sell', 25, 180.00, 4500.00, 'USD', 22.50, 5.00, 'KP004', '20230401 11:45:00', 'success'), +('TX005', 'C005', 'T005', '2023-04-01 12:30:00'::timestamp, 'buy', 5, 2500.00, 12500.00, 'USD', 62.50, 15.00, 'KP005', '20230401 12:30:00', 'success'), +('TX006', 'C002', 'T006', '2023-04-01 13:15:00'::timestamp, 'sell', 75, 200.00, 15000.00, 'USD', 75.00, 10.00, 'KP006', '20230401 13:15:00', 'success'), +('TX007', 'C003', 'T007', '2023-04-01 14:00:00'::timestamp, 'buy', 1, 400000.00, 400000.00, 'USD', 2000.00, 100.00, 'KP007', '20230401 14:00:00', 'success'), +('TX008', 'C003', 'T008', '2023-04-01 14:45:00'::timestamp, 'sell', 100, 130.00, 13000.00, 'USD', 65.00, 10.00, 'KP008', '20230401 14:45:00', 'success'), +('TX009', 'C009', 'T009', '2023-04-01 15:30:00'::timestamp, 'buy', 50, 220.00, 11000.00, 'USD', 55.00, 10.00, 'KP009', '20230401 15:30:00', 'success'), +('TX010', 'C002', 'T010', '2023-04-01 16:15:00'::timestamp, 'sell', 80, 140.00, 11200.00, 'USD', 56.00, 10.00, 'KP010', '20230401 16:15:00', 'success'), +('TX011', 'C001', 'T001', '2023-04-02 09:30:00'::timestamp, 'sell', 50, 151.50, 7575.00, 'USD', 37.88, 5.00, 'KP011', '20230402 09:30:00', 'success'), +('TX012', 'C002', 'T002', '2023-04-02 10:15:00'::timestamp, 'buy', 30, 281.25, 8437.50, 'USD', 42.19, 7.50, 'KP012', '20230402 10:15:00', 'fail'), +('TX013', 'C003', 'T003', '2023-04-02 11:00:00'::timestamp, 'sell', 5, 3212.00, 16060.00, 'USD', 80.30, 15.00, 'KP013', '20230402 11:00:00', 'success'), +('TX014', 'C004', 'T004', '2023-04-02 11:45:00'::timestamp, 'buy', 15, 184.50, 2767.50, 'USD', 13.84, 5.00, 'KP014', '20230402 11:45:00', 'success'), +('TX015', 'C005', 'T005', '2023-04-02 12:30:00'::timestamp, 'sell', 2, 2512.00, 5024.00, 'USD', 25.12, 10.00, 'KP015', '20230402 12:30:00', 'success'), +('TX016', 'C006', 'T006', '2023-04-02 13:15:00'::timestamp, 'buy', 50, 203.00, 10150.00, 'USD', 50.75, 10.00, 'KP016', '20230402 13:15:00', 'success'), +('TX017', 'C007', 'T007', '2023-04-02 14:00:00'::timestamp, 'sell', 1, 401500.00, 401500.00, 'USD', 2007.50, 100.00, 'KP017', '20230402 14:00:00', 'success'), +('TX018', 'C008', 'T008', '2023-04-02 14:45:00'::timestamp, 'buy', 75, 131.25, 9843.75, 'USD', 49.22, 7.50, 'KP018', '20230402 14:45:00', 'success'), +('TX019', 'C009', 'T009', '2023-04-02 15:30:00'::timestamp, 'sell', 25, 221.50, 5537.50, 'USD', 27.69, 5.00, 'KP019', '20230402 15:30:00', 'success'), +('TX020', 'C010', 'T010', '2023-04-02 16:15:00'::timestamp, 'buy', 60, 141.75, 8505.00, 'USD', 42.53, 7.50, 'KP020', '20230402 16:15:00', 'success'), +('TX021', 'C001', 'T001', '2023-04-03 09:30:00'::timestamp, 'buy', 75, 152.25, 11418.75, 'USD', 57.09, 10.00, 'KP021', '20230403 09:30:00', 'fail'), +('TX022', 'C002', 'T002', '2023-04-03 10:15:00'::timestamp, 'sell', 40, 283.00, 11320.00, 'USD', 56.60, 10.00, 'KP022', '20230403 10:15:00', 'success'), +('TX023', 'C003', 'T003', '2023-04-03 11:00:00'::timestamp, 'buy', 8, 3227.00, 25816.00, 'USD', 129.08, 20.00, 'KP023', '20230403 11:00:00', 'success'), +('TX024', 'C004', 'T004', '2023-04-03 11:45:00'::timestamp, 'sell', 20, 186.25, 3725.00, 'USD', 18.63, 5.00, 'KP024', '20230403 11:45:00', 'success'), +('TX025', 'C005', 'T005', '2023-04-03 12:30:00'::timestamp, 'buy', 3, 2522.00, 7566.00, 'USD', 37.83, 15.00, 'KP025', '20230403 12:30:00', 'success'), +('TX026', 'C006', 'T006', '2023-04-03 13:15:00'::timestamp, 'sell', 60, 205.50, 12330.00, 'USD', 61.65, 10.00, 'KP026', '20230403 13:15:00', 'success'), +('TX027', 'C007', 'T007', '2023-04-03 14:00:00'::timestamp, 'buy', 1, 402500.00, 402500.00, 'USD', 2012.50, 100.00, 'KP027', '20230403 14:00:00', 'success'), +('TX028', 'C008', 'T008', '2023-04-03 14:45:00'::timestamp, 'sell', 90, 132.75, 11947.50, 'USD', 59.74, 7.50, 'KP028', '20230403 14:45:00', 'success'), +('TX029', 'C009', 'T009', '2023-04-03 15:30:00'::timestamp, 'buy', 40, 222.25, 8890.00, 'USD', 44.45, 10.00, 'KP029', '20230403 15:30:00', 'success'), +('TX030', 'C010', 'T010', '2023-04-03 16:15:00'::timestamp, 'sell', 70, 142.50, 9975.00, 'USD', 49.88, 10.00, 'KP030', '20230403 16:15:00', 'success'), +('TX031', 'C001', 'T001', CURRENT_DATE - INTERVAL '9 days', 'buy', 100, 150.00, 15000.00, 'USD', 75.00, 10.00, 'KP031', NULL, 'fail'), +('TX032', 'C002', 'T002', CURRENT_DATE - INTERVAL '8 days', 'sell', 80, 280.00, 14000.00, 'USD', 70.00, 10.00, 'KP032', TO_CHAR(CURRENT_DATE - INTERVAL '8 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX033', 'C003', 'T001', CURRENT_DATE - INTERVAL '7 days', 'buy', 120, 200.00, 24000.00, 'USD', 120.00, 15.00, 'KP033', TO_CHAR(CURRENT_DATE - INTERVAL '7 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX034', 'C004', 'T004', CURRENT_DATE - INTERVAL '6 days', 'sell', 90, 320.00, 28800.00, 'USD', 144.00, 12.00, 'KP034', TO_CHAR(CURRENT_DATE - INTERVAL '6 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX035', 'C005', 'T001', CURRENT_DATE - INTERVAL '5 days', 'buy', 150, 180.00, 27000.00, 'USD', 135.00, 20.00, 'KP035', NULL, 'fail'), +('TX036', 'C006', 'T006', CURRENT_DATE - INTERVAL '4 days', 'sell', 70, 300.00, 21000.00, 'USD', 105.00, 15.00, 'KP036', TO_CHAR(CURRENT_DATE - INTERVAL '4 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX037', 'C007', 'T007', CURRENT_DATE - INTERVAL '3 days', 'buy', 110, 220.00, 24200.00, 'USD', 121.00, 10.00, 'KP037', TO_CHAR(CURRENT_DATE - INTERVAL '3 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX038', 'C008', 'T008', CURRENT_DATE - INTERVAL '2 days', 'sell', 100, 350.00, 35000.00, 'USD', 175.00, 25.00, 'KP038', TO_CHAR(CURRENT_DATE - INTERVAL '2 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX039', 'C009', 'T007', CURRENT_DATE - INTERVAL '1 day', 'buy', 80, 230.00, 18400.00, 'USD', 92.00, 18.00, 'KP039', NULL, 'pending'), +('TX040', 'C001', 'T011', CURRENT_DATE - INTERVAL '10 days', 'buy', 50, 400.00, 20000.00, 'USD', 100.00, 20.00, 'KP040', TO_CHAR(CURRENT_DATE - INTERVAL '10 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX041', 'C002', 'T012', CURRENT_DATE - INTERVAL '9 days', 'sell', 30, 320.00, 9600.00, 'USD', 48.00, 15.00, 'KP041', TO_CHAR(CURRENT_DATE - INTERVAL '9 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX042', 'C003', 'T013', CURRENT_DATE - INTERVAL '8 days', 'buy', 80, 180.00, 14400.00, 'USD', 72.00, 10.00, 'KP042', TO_CHAR(CURRENT_DATE - INTERVAL '8 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX043', 'C004', 'T014', CURRENT_DATE - INTERVAL '7 days', 'sell', 60, 220.00, 13200.00, 'USD', 66.00, 12.00, 'KP043', NULL, 'pending'), +('TX044', 'C012', 'T001', '2023-01-15 10:00:00'::timestamp, 'buy', 80, 155.00, 12400.00, 'USD', 62.00, 10.00, 'KP044', '20230115 10:00:00', 'success'), +('TX045', 'C012', 'T001', '2023-01-16 10:30:00'::timestamp, 'buy', 80, 155.00, 12400.00, 'USD', 62.00, 10.00, 'KP045', '20230116 10:30:00', 'success'), +('TX046', 'C013', 'T002', '2023-02-20 11:30:00'::timestamp, 'sell', 60, 285.00, 17100.00, 'USD', 85.50, 15.00, 'KP046', '20230220 11:30:00', 'success'), +('TX047', 'C014', 'T003', '2023-03-25 14:45:00'::timestamp, 'buy', 5, 3250.00, 16250.00, 'USD', 81.25, 20.00, 'KP047', '20230325 14:45:00', 'success'), +('TX048', 'C012', 'T004', '2023-01-30 13:15:00'::timestamp, 'sell', 40, 190.00, 7600.00, 'USD', 38.00, 10.00, 'KP048', '20230130 13:15:00', 'success'), +('TX049', 'C013', 'T005', '2023-02-28 16:00:00'::timestamp, 'buy', 2, 2550.00, 5100.00, 'USD', 25.50, 15.00, 'KP049', '20230228 16:00:00', 'success'), +('TX050', 'C014', 'T006', '2023-03-30 09:45:00'::timestamp, 'sell', 30, 210.00, 6300.00, 'USD', 31.50, 10.00, 'KP050', '20230331 09:45:00', 'success'), +('TX051', 'C015', 'T001', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '5 months' + INTERVAL '1 day', 'buy', 50, 150.00, 7500.00, 'USD', 37.50, 10.00, 'KP051', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '5 months' + INTERVAL '1 day', '%Y%m%d %H:%i:%s'), 'success'), +('TX052', 'C016', 'T002', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '4 months' + INTERVAL '2 days', 'sell', 40, 280.00, 11200.00, 'USD', 56.00, 10.00, 'KP052', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '4 months' + INTERVAL '2 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX053', 'C017', 'T003', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '3 months' + INTERVAL '3 days', 'buy', 15, 3200.00, 48000.00, 'USD', 240.00, 20.00, 'KP053', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '3 months' + INTERVAL '3 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX054', 'C018', 'T004', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months' + INTERVAL '4 days', 'sell', 30, 180.00, 5400.00, 'USD', 27.00, 5.00, 'KP054', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months' + INTERVAL '4 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX055', 'C019', 'T005', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' + INTERVAL '5 days', 'buy', 10, 2500.00, 25000.00, 'USD', 125.00, 15.00, 'KP055', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' + INTERVAL '5 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX056', 'C002', 'T006', DATE_TRUNC('month', CURRENT_DATE) + INTERVAL '1 day', 'sell', 20, 200.00, 4000.00, 'USD', 20.00, 10.00, 'KP056', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) + INTERVAL '1 day', '%Y%m%d %H:%i:%s'), 'success'); + +------------------------------------------------------------------------------- +-- For the Dealership SCHEMA +-- https://github.com/defog-ai/defog-data/blob/main/defog_data/car_dealership/car_dealership.sql + +-- Snowflake dialect compatibility replace: +-- - SERIAL PRIMARY KEY -> INTEGER AUTOINCREMENT PRIMARY KEY +-- - NOW() -> CURRENT_TIMESTAMP +------------------------------------------------------------------------------- +CREATE SCHEMA DEALERSHIP; + +CREATE TABLE cars ( + id INTEGER AUTOINCREMENT PRIMARY KEY, + make TEXT NOT NULL, -- manufacturer of the car + model TEXT NOT NULL, -- model name of the car + year INTEGER NOT NULL, -- year of manufacture + color TEXT NOT NULL, -- color of the car + vin_number VARCHAR(17) NOT NULL UNIQUE, -- Vehicle Identification Number + engine_type TEXT NOT NULL, -- type of engine (e.g., V6, V8, Electric) + transmission TEXT NOT NULL, -- type of transmission (e.g., Automatic, Manual) + cost NUMERIC(10, 2) NOT NULL, -- cost of the car + crtd_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP -- timestamp when the car was added to the system +); + +CREATE TABLE salespersons ( + id INTEGER AUTOINCREMENT PRIMARY KEY, + first_name TEXT NOT NULL, + last_name TEXT NOT NULL, + email VARCHAR(255) NOT NULL UNIQUE, + phone VARCHAR(20) NOT NULL, + hire_date DATE NOT NULL, + termination_date DATE, + crtd_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE customers ( + id INTEGER AUTOINCREMENT PRIMARY KEY, + first_name TEXT NOT NULL, + last_name TEXT NOT NULL, + email VARCHAR(255) NOT NULL UNIQUE, + phone VARCHAR(20) NOT NULL, + address TEXT NOT NULL, + city TEXT NOT NULL, + state TEXT NOT NULL, + zip_code VARCHAR(10) NOT NULL, + crtd_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE sales ( + id INTEGER AUTOINCREMENT PRIMARY KEY, + car_id INTEGER NOT NULL REFERENCES cars(id), + salesperson_id INTEGER NOT NULL REFERENCES salespersons(id), + customer_id INTEGER NOT NULL REFERENCES customers(id), + sale_price NUMERIC(10, 2) NOT NULL, + sale_date DATE NOT NULL, + crtd_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE inventory_snapshots ( + id INTEGER AUTOINCREMENT PRIMARY KEY, + snapshot_date DATE NOT NULL, + car_id INTEGER NOT NULL REFERENCES cars(id), + is_in_inventory BOOLEAN NOT NULL, + crtd_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE payments_received ( + id INTEGER AUTOINCREMENT PRIMARY KEY, + sale_id INTEGER NOT NULL REFERENCES sales(id), + payment_date DATE NOT NULL, + payment_amount NUMERIC(10, 2) NOT NULL, + payment_method TEXT NOT NULL, -- values: cash, check, credit_card, debit_card, financing + crtd_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE payments_made ( + id INTEGER AUTOINCREMENT PRIMARY KEY, + vendor_name TEXT NOT NULL, + payment_date DATE NOT NULL, + payment_amount NUMERIC(10, 2) NOT NULL, + payment_method TEXT NOT NULL, -- values: check, bank_transfer, credit_card + invoice_number VARCHAR(50) NOT NULL, + invoice_date DATE NOT NULL, + due_date DATE NOT NULL, + crtd_ts TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP +); + + +-- cars +INSERT INTO cars (id, make, model, year, color, vin_number, engine_type, transmission, cost) +VALUES + (1, 'Toyota', 'Camry', 2022, 'Silver', '4T1BF1FK3CU510984', 'V6', 'Automatic', 28500.00), + (2, 'Honda', 'Civic', 2021, 'platinum/grey', '2HGFC2F53MH522780', 'Inline 4', 'CVT', 22000.00), + (3, 'Ford', 'Mustang', 2023, 'blue', '1FA6P8TH4M5100001', 'V8', 'Manual', 45000.00), + (4, 'Tesla', 'Model 3', 2022, 'fuschia', '5YJ3E1EB7MF123456', 'Electric', 'Automatic', 41000.00), + (5, 'Chevrolet', 'Equinox', 2021, 'midnight blue', '2GNAXUEV1M6290124', 'Inline 4', 'Automatic', 26500.00), + (6, 'Nissan', 'Altima', 2022, 'Jet black', '1N4BL4BV4NN123456', 'V6', 'CVT', 25000.00), + (7, 'BMW', 'X5', 2023, 'Titan Silver', '5UXCR6C56M9A12345', 'V8', 'Automatic', 62000.00), + (8, 'Audi', 'A4', 2022, 'Blue', 'WAUBNAF47MA098765', 'Inline 4', 'Automatic', 39000.00), + (9, 'Lexus', 'RX350', 2021, 'Fiery red', '2T2BZMCA7MC143210', 'V6', 'Automatic', 45500.00), + (10, 'Subaru', 'Outback', 2022, 'Jade', '4S4BSANC2N3246801', 'Boxer 4', 'CVT', 28000.00), + (11, 'Mazda', 'CX-5', 2022, 'Royal Purple', 'JM3KE4DY4N0123456', 'Inline 4', 'Automatic', 29000.00), + (12, 'Hyundai', 'Tucson', 2023, 'black', 'KM8J3CAL3NU123456', 'Inline 4', 'Automatic', 32000.00), + (13, 'Kia', 'Sorento', 2021, 'ebony black', '5XYPH4A50MG987654', 'V6', 'Automatic', 32000.00), + (14, 'Jeep', 'Wrangler', 2022, 'Harbor Gray', '1C4HJXDG3NW123456', 'V6', 'Automatic', 38000.00), + (15, 'GMC', 'Sierra 1500', 2023, 'Snow White', '1GTU9CED3NZ123456', 'V8', 'Automatic', 45000.00), + (16, 'Ram', '1500', 2022, 'baby blue', '1C6SRFFT3NN123456', 'V8', 'Automatic', 42000.00), + (17, 'Mercedes-Benz', 'E-Class', 2021, 'Silver', 'W1KZF8DB1MA123456', 'Inline 6', 'Automatic', 62000.00), + (18, 'Volkswagen', 'Tiguan', 2022, 'Red', '3VV2B7AX1NM123456', 'Inline 4', 'Automatic', 32000.00), + (19, 'Volvo', 'XC90', 2023, 'black', 'YV4A22PK3N1234567', 'Inline 4', 'Automatic', 65000.00), + (20, 'Porsche', '911', 2022, 'white', 'WP0AA2A93NS123456', 'Flat 6', 'Automatic', 120000.00), + (21, 'Cadillac', 'Escalade', 2023, 'Black', '1GYS4HKJ3MR123456', 'V8', 'Automatic', 85000.00); + +-- salespersons +INSERT INTO salespersons (id, first_name, last_name, email, phone, hire_date, termination_date) +VALUES + (1, 'John', 'Doe', 'john.doe@autonation.com', '(555)-123-4567', CURRENT_DATE - INTERVAL '2 years', NULL), + (2, 'Jane', 'Smith', 'jane.smith@autonation.com', '(415)-987-6543', CURRENT_DATE - INTERVAL '3 years', NULL), + (3, 'Michael', 'Johnson', 'michael.johnson@autonation.com', '(555)-456-7890', CURRENT_DATE - INTERVAL '1 year', NULL), + (4, 'Emily', 'Brown', 'emily.brown@sonicauto.com', '(444)-111-2222', CURRENT_DATE - INTERVAL '1 year', CURRENT_DATE - INTERVAL '1 month'), + (5, 'David', 'Wilson', 'david.wilson@sonicauto.com', '(444)-333-4444', CURRENT_DATE - INTERVAL '2 years', NULL), + (6, 'Sarah', 'Taylor', 'sarah.taylor@sonicauto.com', '(123)-555-6666', '2018-09-01', '2022-09-01'), + (7, 'Daniel', 'Anderson', 'daniel.anderson@sonicauto.com', '(555)-777-8888', '2021-07-12', NULL), + (8, 'Olivia', 'Thomas', 'olivia.thomas@pensake.com', '(333)-415-0000', '2023-01-25', '2023-07-25'), + (9, 'James', 'Jackson', 'james.jackson@pensake.com', '(555)-212-3333', '2019-04-30', NULL), + (10, 'Sophia', 'White', 'sophia.white@pensake.com', '(555)-444-5555', '2022-08-18', NULL), + (11, 'Robert', 'Johnson', 'robert.johnson@pensake.com', '(001)-415-5678', CURRENT_DATE - INTERVAL '15 days', NULL), + (12, 'Jennifer', 'Davis', 'jennifer.davis@directauto.com', '(555)-345-6789', CURRENT_DATE - INTERVAL '20 days', NULL), + (13, 'Jessica', 'Rodriguez', 'jessica.rodriguez@directauto.com', '(555)-789-0123', '2022-06-01', NULL); + +-- customers +INSERT INTO customers (id, first_name, last_name, email, phone, address, city, state, zip_code, crtd_ts) +VALUES + -- (1, 'William', 'Davis', 'william.davis@example.com', '555-888-9999', '123 Main St', 'New York', 'NY', '10001', CURRENT_TIMESTAMP - INTERVAL '5 years');, + (2, 'Ava', 'Miller', 'ava.miller@example.com', '555-777-6666', '456 Oak Ave', 'Los Angeles', 'CA', '90001', CURRENT_TIMESTAMP - INTERVAL '4 years'), + (3, 'Benjamin', 'Wilson', 'benjamin.wilson@example.com', '555-666-5555', '789 Elm St', 'Chicago', 'IL', '60007', CURRENT_TIMESTAMP - INTERVAL '3 years'), + (4, 'Mia', 'Moore', 'mia.moore@example.com', '555-555-4444', '321 Pine Rd', 'Houston', 'TX', '77001', CURRENT_TIMESTAMP - INTERVAL '2 years'), + (5, 'Henry', 'Taylor', 'henry.taylor@example.com', '555-444-3333', '654 Cedar Ln', 'Phoenix', 'AZ', '85001', CURRENT_TIMESTAMP - INTERVAL '1 year'), + (6, 'Charlotte', 'Anderson', 'charlotte.anderson@example.com', '555-333-2222', '987 Birch Dr', 'Philadelphia', 'PA', '19019', CURRENT_TIMESTAMP - INTERVAL '5 years'), + (7, 'Alexander', 'Thomas', 'alexander.thomas@example.com', '555-222-1111', '741 Walnut St', 'San Antonio', 'TX', '78006', CURRENT_TIMESTAMP - INTERVAL '4 years'), + (8, 'Amelia', 'Jackson', 'amelia.jackson@gmail.com', '555-111-0000', '852 Maple Ave', 'San Diego', 'CA', '92101', CURRENT_TIMESTAMP - INTERVAL '3 years'), + (9, 'Daniel', 'White', 'daniel.white@youtube.com', '555-000-9999', '963 Oak St', 'Dallas', 'TX', '75001', CURRENT_TIMESTAMP - INTERVAL '2 years'), + (10, 'Abigail', 'Harris', 'abigail.harris@company.io', '555-999-8888', '159 Pine Ave', 'San Jose', 'CA', '95101', CURRENT_TIMESTAMP - INTERVAL '1 year'), + (11, 'Christopher', 'Brown', 'christopher.brown@ai.com', '555-456-7890', '753 Maple Rd', 'Miami', 'FL', '33101', CURRENT_TIMESTAMP - INTERVAL '5 months'), + (12, 'Sophia', 'Lee', 'sophia.lee@microsoft.com', '555-567-8901', '951 Oak Ln', 'Seattle', 'WA', '98101', CURRENT_TIMESTAMP - INTERVAL '6 months'), + (13, 'Michael', 'Chen', 'michael.chen@company.com', '(555)-456-7890', '123 Oak St', 'San Francisco', 'CA', '94101', CURRENT_TIMESTAMP - INTERVAL '3 months'); + +-- sales +INSERT INTO sales (id, car_id, salesperson_id, customer_id, sale_price, sale_date) +VALUES + (1, 1, 2, 3, 30500.00, '2023-03-15'), + (2, 3, 1, 5, 47000.00, '2023-03-20'), + (3, 6, 4, 2, 26500.00, '2023-03-22'), + (4, 8, 7, 9, 38000.00, '2023-03-25'), + (5, 2, 4, 7, 23500.00, '2023-03-28'), + (6, 10, 6, 1, 30000.00, '2023-04-01'), + (7, 5, 3, 6, 26800.00, '2023-04-05'), + (8, 7, 2, 10, 63000.00, '2023-04-10'), + (9, 4, 6, 8, 42500.00, '2023-04-12'), + (10, 9, 2, 4, 44500.00, '2023-04-15'), + (11, 1, 7, 11, 28900.00, CURRENT_DATE - INTERVAL '32 days'), + (12, 3, 3, 12, 46500.00, CURRENT_DATE - INTERVAL '10 days'), + (13, 6, 1, 11, 26000.00, CURRENT_DATE - INTERVAL '15 days'), + (14, 2, 3, 1, 23200.00, CURRENT_DATE - INTERVAL '21 days'), + (15, 8, 6, 12, 43500.00, CURRENT_DATE - INTERVAL '3 days'), + (16, 10, 4, 2, 29500.00, CURRENT_DATE - INTERVAL '5 days'), + (17, 3, 2, 3, 46000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' + INTERVAL '1 day'), + (18, 3, 2, 7, 47500.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week'), + (19, 3, 2, 10, 46500.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' - INTERVAL '1 day'), + (20, 4, 1, 3, 48000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' + INTERVAL '1 day'), + (21, 4, 1, 7, 45000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week'), + (22, 4, 1, 10, 49000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' - INTERVAL '1 day'); + + +-- inventory_snapshots +INSERT INTO inventory_snapshots (id, snapshot_date, car_id, is_in_inventory) +VALUES + (1, '2023-03-15', 1, TRUE), + (2, '2023-03-15', 2, TRUE), + (3, '2023-03-15', 3, TRUE), + (4, '2023-03-15', 4, TRUE), + (5, '2023-03-15', 5, TRUE), + (6, '2023-03-15', 6, TRUE), + (7, '2023-03-15', 7, TRUE), + (8, '2023-03-15', 8, TRUE), + (9, '2023-03-15', 9, TRUE), + (10, '2023-03-15', 10, TRUE), + (11, '2023-03-20', 1, FALSE), + (12, '2023-03-20', 3, FALSE), + (13, '2023-03-22', 6, FALSE), + (14, '2023-03-25', 8, FALSE), + (15, '2023-03-28', 2, FALSE), + (16, '2023-04-01', 10, FALSE), + (17, '2023-04-05', 5, FALSE), + (18, '2023-04-10', 7, FALSE), + (19, '2023-04-12', 4, FALSE), + (20, '2023-04-15', 9, FALSE), + (21, '2023-03-28', 1, TRUE), + (22, '2023-03-28', 3, TRUE), + (23, '2023-03-28', 4, FALSE); + +-- payments_received +INSERT INTO payments_received (id, sale_id, payment_date, payment_amount, payment_method) +VALUES + (1, 1, '2023-03-15', 5000.00, 'check'), + (2, 1, '2023-03-20', 22500.00, 'financing'), + (3, 2, '2023-03-20', 44000.00, 'credit_card'), + (4, 3, '2023-03-22', 24500.00, 'debit_card'), + (5, 4, '2023-03-25', 38000.00, 'financing'), + (6, 5, '2023-03-28', 21500.00, 'cash'), + (7, 6, '2023-04-01', 27000.00, 'credit_card'), + (8, 7, '2023-04-05', 26000.00, 'debit_card'), + (9, 8, '2023-04-10', 60000.00, 'financing'), + (10, 9, '2023-04-12', 40000.00, 'check'), + (11, 10, '2023-04-15', 44500.00, 'credit_card'), + (12, 11, CURRENT_DATE - INTERVAL '30 days', 28000.00, 'cash'), + (13, 12, CURRENT_DATE - INTERVAL '3 days', 43500.00, 'credit_card'), + (14, 13, CURRENT_DATE - INTERVAL '6 days', 24000.00, 'debit_card'), + (15, 14, CURRENT_DATE - INTERVAL '1 days', 17200.00, 'financing'), + (16, 15, CURRENT_DATE - INTERVAL '1 days', 37500.00, 'credit_card'), + (17, 16, CURRENT_DATE - INTERVAL '5 days', 26500.00, 'debit_card'), + (18, 17, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' + INTERVAL '1 day', 115000.00, 'financing'), + (19, 18, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week', 115000.00, 'credit_card'), + (20, 19, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' - INTERVAL '1 day', 115000.00, 'debit_card'), + (21, 20, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' + INTERVAL '1 day', 115000.00, 'cash'), + (22, 21, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week', 115000.00, 'check'), + (23, 22, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' - INTERVAL '1 day', 115000.00, 'credit_card'); + +-- payments_made +INSERT INTO payments_made (id, vendor_name, payment_date, payment_amount, payment_method, invoice_number, invoice_date, due_date) +VALUES + (1, 'Car Manufacturer Inc', '2023-03-01', 150000.00, 'bank_transfer', 'INV-001', '2023-02-25', '2023-03-25'), + (2, 'Auto Parts Supplier', '2023-03-10', 25000.00, 'check', 'INV-002', '2023-03-05', '2023-04-04'), + (3, 'Utility Company', '2023-03-15', 1500.00, 'bank_transfer', 'INV-003', '2023-03-01', '2023-03-31'), + (4, 'Marketing Agency', '2023-03-20', 10000.00, 'credit_card', 'INV-004', '2023-03-15', '2023-04-14'), + (5, 'Insurance Provider', '2023-03-25', 5000.00, 'bank_transfer', 'INV-005', '2023-03-20', '2023-04-19'), + (6, 'Cleaning Service', '2023-03-31', 2000.00, 'check', 'INV-006', '2023-03-25', '2023-04-24'), + (7, 'Car Manufacturer Inc', '2023-04-01', 200000.00, 'bank_transfer', 'INV-007', '2023-03-25', '2023-04-24'), + (8, 'Auto Parts Supplier', '2023-04-10', 30000.00, 'check', 'INV-008', '2023-04-05', '2023-05-05'), + (9, 'Utility Company', '2023-04-15', 1500.00, 'bank_transfer', 'INV-009', '2023-04-01', '2023-04-30'), + (10, 'Marketing Agency', '2023-04-20', 15000.00, 'credit_card', 'INV-010', '2023-04-15', '2023-05-15'), + (11, 'Insurance Provider', '2023-04-25', 5000.00, 'bank_transfer', 'INV-011', '2023-04-20', '2023-05-20'), + (12, 'Cleaning Service', '2023-04-30', 2000.00, 'check', 'INV-012', '2023-04-25', '2023-05-25'), + (13, 'Toyota Auto Parts', CURRENT_DATE - INTERVAL '5 days', 12500.00, 'bank_transfer', 'INV-013', CURRENT_DATE - INTERVAL '10 days', CURRENT_DATE + INTERVAL '20 days'), + (14, 'Honda Manufacturing', CURRENT_DATE - INTERVAL '3 days', 18000.00, 'check', 'INV-014', CURRENT_DATE - INTERVAL '8 days', CURRENT_DATE + INTERVAL '22 days'), + (15, 'Ford Supplier Co', CURRENT_DATE - INTERVAL '2 days', 22000.00, 'bank_transfer', 'INV-015', CURRENT_DATE - INTERVAL '7 days', CURRENT_DATE + INTERVAL '23 days'), + (16, 'Tesla Parts Inc', CURRENT_DATE - INTERVAL '1 day', 15000.00, 'credit_card', 'INV-016', CURRENT_DATE - INTERVAL '6 days', CURRENT_DATE + INTERVAL '24 days'), + (17, 'Chevrolet Auto', CURRENT_DATE, 20000.00, 'bank_transfer', 'INV-017', CURRENT_DATE - INTERVAL '5 days', CURRENT_DATE + INTERVAL '25 days'); + + +------------------------------------------------------------------------------- +-- For the EWALLET SCHEMA +-- https://github.com/defog-ai/defog-data/blob/main/defog_data/ewallet/ewallet.sql + +-- Snowflake dialect compatibility replace: +-- - SERIAL PRIMARY KEY -> INTEGER AUTOINCREMENT PRIMARY KEY +-- - NOW() -> CURRENT_TIMESTAMP +------------------------------------------------------------------------------- +CREATE SCHEMA EWALLET; + +CREATE TABLE users ( + uid BIGINT PRIMARY KEY, + username VARCHAR(50) NOT NULL, + email VARCHAR(100) NOT NULL, + phone_number VARCHAR(20), + created_at TIMESTAMP NOT NULL, + last_login_at TIMESTAMP, + user_type VARCHAR(20) NOT NULL, -- possible values: individual, business, admin + status VARCHAR(20) NOT NULL, -- possible values: active, inactive, suspended, deleted + country VARCHAR(2), -- 2-letter country code + address_billing TEXT, + address_delivery TEXT, + kyc_status VARCHAR(20), -- possible values: pending, approved, rejected + kyc_verified_at TIMESTAMP +); + +CREATE TABLE merchants ( + mid BIGINT PRIMARY KEY, + name VARCHAR(100) NOT NULL, + description TEXT, + website_url VARCHAR(200), + logo_url VARCHAR(200), + created_at TIMESTAMP NOT NULL, + country VARCHAR(2), -- 2-letter country code + state VARCHAR(50), + city VARCHAR(50), + postal_code VARCHAR(20), + address TEXT, + status VARCHAR(20) NOT NULL, -- possible values: active, inactive, suspended + category VARCHAR(50), + sub_category VARCHAR(50), + mcc INT, -- Merchant Category Code + contact_name VARCHAR(100), + contact_email VARCHAR(100), + contact_phone VARCHAR(20) +); + +CREATE TABLE coupons ( + cid BIGINT PRIMARY KEY, + merchant_id BIGINT NOT NULL REFERENCES merchants(mid), + code VARCHAR(20) NOT NULL, + description TEXT, + start_date DATE NOT NULL, + end_date DATE NOT NULL, + discount_type VARCHAR(20) NOT NULL, -- possible values: percentage, fixed_amount + discount_value DECIMAL(10,2) NOT NULL, + min_purchase_amount DECIMAL(10,2), + max_discount_amount DECIMAL(10,2), + redemption_limit INT, + status VARCHAR(20) NOT NULL, -- possible values: active, inactive, expired + created_at TIMESTAMP NOT NULL, + updated_at TIMESTAMP +); + +-- Fact Tables -- + +CREATE TABLE wallet_transactions_daily ( + txid INTEGER AUTOINCREMENT PRIMARY KEY, + sender_id BIGINT NOT NULL, + sender_type INT NOT NULL, -- 0 for user, 1 for merchant + receiver_id BIGINT NOT NULL, + receiver_type INT NOT NULL, -- 0 for user, 1 for merchant + amount DECIMAL(10,2) NOT NULL, + status VARCHAR(20) NOT NULL, -- possible values: pending, success, failed, refunded + type VARCHAR(20) NOT NULL, -- possible values: credit, debit + description TEXT, + coupon_id BIGINT, -- NULL if transaction doesn't involve a coupon + created_at TIMESTAMP NOT NULL, + completed_at TIMESTAMP, -- NULL if failed + transaction_ref VARCHAR(36) NOT NULL, -- randomly generated uuid4 for users' reference + gateway_name VARCHAR(50), + gateway_ref VARCHAR(50), + device_id VARCHAR(50), + ip_address VARCHAR(50), + user_agent TEXT +); + +CREATE TABLE wallet_user_balance_daily ( + user_id BIGINT, + balance DECIMAL(10,2) NOT NULL, + updated_at TIMESTAMP NOT NULL +); + +CREATE TABLE wallet_merchant_balance_daily ( + merchant_id BIGINT, + balance DECIMAL(10,2) NOT NULL, + updated_at TIMESTAMP NOT NULL +); + +CREATE TABLE notifications ( + id INTEGER AUTOINCREMENT PRIMARY KEY, + user_id INT NOT NULL REFERENCES users(uid), + message TEXT NOT NULL, + type VARCHAR(50) NOT NULL, -- possible values: transaction, promotion, security, general + status VARCHAR(20) NOT NULL, -- possible values: unread, read, archived + created_at TIMESTAMP NOT NULL, + read_at TIMESTAMP, -- NULL if not read + device_type VARCHAR(10), -- possible values: mobile_app, web_app, email, sms + device_id VARCHAR(36), + action_url TEXT -- can be external https or deeplink url within the app +); + +CREATE TABLE user_sessions ( + user_id BIGINT NOT NULL, + session_start_ts TIMESTAMP NOT NULL, + session_end_ts TIMESTAMP, + device_type VARCHAR(10), -- possible values: mobile_app, web_app, email, sms + device_id VARCHAR(36) +); + +CREATE TABLE user_setting_snapshot ( + user_id BIGINT NOT NULL, + snapshot_date DATE NOT NULL, + tx_limit_daily DECIMAL(10,2), + tx_limit_monthly DECIMAL(10,2), + membership_status INTEGER, -- 0 for bronze, 1 for silver, 2 for gold, 3 for platinum, 4 for VIP + password_hash VARCHAR(255), + api_key VARCHAR(255), + verified_devices TEXT, -- comma separated list of device ids + verified_ips TEXT, -- comma separated list of IP addresses + mfa_enabled BOOLEAN, + marketing_opt_in BOOLEAN, + created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY (user_id, snapshot_date) +); + +-- users +INSERT INTO users (uid, username, email, phone_number, created_at, user_type, status, country, address_billing, address_delivery, kyc_status) +VALUES + (1, 'john_doe', 'john.doe@email.com', '+1234567890', DATE_TRUNC('month', CURRENT_TIMESTAMP) - INTERVAL '1 month', 'individual', 'active', 'US', '123 Main St, Anytown US 12345', '123 Main St, Anytown US 12345', 'approved'), + (2, 'jane_smith', 'jane.smith@email.com', '+9876543210', DATE_TRUNC('month', CURRENT_TIMESTAMP) - INTERVAL '2 months', 'individual', 'active', 'CA', '456 Oak Rd, Toronto ON M1M2M2', '456 Oak Rd, Toronto ON M1M2M2', 'approved'), + (3, 'bizuser', 'contact@business.co', '+1234509876', '2021-06-01 09:15:00', 'business', 'active', 'FR', '12 Rue Baptiste, Paris 75001', NULL, 'approved'), + (4, 'david_miller', 'dave@personal.email', '+4477788899', '2023-03-20 18:45:00', 'individual', 'inactive', 'GB', '25 London Road, Manchester M12 4XY', '25 London Road, Manchester M12 4XY', 'pending'), + (5, 'emily_wilson', 'emily.w@gmail.com', '+8091017161', '2021-11-03 22:10:00', 'individual', 'suspended', 'AU', '72 Collins St, Melbourne VIC 3000', '19 Smith St, Brunswick VIC 3056', 'rejected'), + (6, 'techcorp', 'orders@techcorp.com', '+14165558888', '2018-05-20 11:35:00', 'business', 'active', 'US', '33 Technology Dr, Silicon Valley CA 94301', NULL, 'approved'), + (7, 'shopsmart', 'customerserv@shopsmart.biz', '+6585771234', '2020-09-15 06:25:00', 'business', 'inactive', 'SG', '888 Orchard Rd, #05-000, Singapore 238801', NULL, 'approved'), + (8, 'michael_brown', 'mike.brown@outlook.com', '+3912378624', '2019-07-22 16:40:00', 'individual', 'active', 'DE', 'Heidestr 17, Berlin 10557', 'Heidestr 17, Berlin 10557', 'approved'), + (9, 'alex_taylor', 'ataylo@university.edu', NULL, '2022-08-30 09:15:00', 'individual', 'active', 'NZ', '12 Mardon Rd, Wellington 6012', '5 Boulcott St, Wellington 6011', 'approved'), + (10, 'huang2143', 'huang2143@example.com', '+8612345678901', '2023-12-10 08:00:00', 'individual', 'active', 'CN', '123 Nanjing Road, Shanghai 200000', '123 Nanjing Road, Shanghai 200000', 'approved'), + (11, 'lisa_jones', 'lisa.jones@email.com', '+6123456789', '2023-09-05 15:20:00', 'individual', 'active', 'AU', '789 George St, Sydney NSW 2000', '789 George St, Sydney NSW 2000', 'approved'); + +-- merchants +INSERT INTO merchants (mid, name, description, website_url, logo_url, created_at, country, state, city, postal_code, address, status, category, sub_category, mcc, contact_name, contact_email, contact_phone) +VALUES + (1, 'TechMart', 'Leading electronics retailer', 'https://www.techmart.com', 'https://www.techmart.com/logo.png', '2015-01-15 00:00:00', 'US', 'California', 'Los Angeles', '90011', '645 Wilshire Blvd, Los Angeles CA 90011', 'active', 'retail (hardware)', 'Electronics', 5732, 'John Jacobs', 'jjacobs@techmart.com', '+15551234567'), + (2, 'FitLifeGear', 'Fitness equipment and activewear', 'https://fitlifegear.com', 'https://fitlifegear.com/brand.jpg', '2018-07-01 00:00:00', 'CA', 'Ontario', 'Toronto', 'M5V2J2', '421 Richmond St W, Toronto ON M5V2J2', 'active', 'retail (hardware)', 'Sporting Goods', 5655, 'Jane McDonald', 'jmcdonald@fitlifegear.com', '+14165559876'), + (3, 'UrbanDining', 'Local restaurants and cafes', 'https://www.urbandining.co', 'https://www.urbandining.co/logo.png', '2020-03-10 00:00:00', 'FR', NULL, 'Paris', '75011', '35 Rue du Faubourg Saint-Antoine, 75011 Paris', 'active', 'Food & Dining', 'Restaurants', 5812, 'Pierre Gagnon', 'pgagnon@urbandining.co', '+33612345678'), + (4, 'LuxStays', 'Boutique vacation rentals', 'https://luxstays.com', 'https://luxstays.com/branding.jpg', '2016-11-01 00:00:00', 'IT', NULL, 'Rome', '00187', 'Via della Conciliazione 15, Roma 00187', 'inactive', 'Travel & Hospitality', 'Accommodation', 7011, 'Marco Rossi', 'mrossi@luxstays.com', '+39061234567'), + (5, 'HandyCraft', 'Handmade arts and crafts supplies', 'https://handycraft.store', 'https://handycraft.store/hc-logo.png', '2022-06-20 00:00:00', 'ES', 'Catalonia', 'Barcelona', '08003', 'Passeig de Gracia 35, Barcelona 08003', 'active', 'Retail', 'Crafts & Hobbies', 5949, 'Ana Garcia', 'agarcia@handycraft.store', '+34612345678'), + (6, 'CodeSuite', 'SaaS productivity tools for developers', 'https://codesuite.io', 'https://codesuite.io/logo.svg', '2019-02-01 00:00:00', 'DE', NULL, 'Berlin', '10119', 'Dessauer Str 28, 10119 Berlin', 'active', 'Business Services', 'Software', 5734, 'Michael Schmidt', 'mschmidt@codesuite.io', '+49301234567'), + (7, 'ZenHomeGoods', 'Housewares and home decor items', 'https://www.zenhomegoods.com', 'https://www.zenhomegoods.com/branding.jpg', '2014-09-15 00:00:00', 'AU', 'Victoria', 'Melbourne', '3004', '159 Franklin St, Melbourne VIC 3004', 'active', 'Retail', 'Home & Garden', 5719, 'Emily Watson', 'ewatson@zenhomegoods.com', '+61312345678'), + (8, 'KidzPlayhouse', 'Children''s toys and games', 'https://kidzplayhouse.com', 'https://kidzplayhouse.com/logo.png', '2017-04-01 00:00:00', 'GB', NULL, 'London', 'WC2N 5DU', '119 Charing Cross Rd, London WC2N 5DU', 'suspended', 'Retail', 'Toys & Games', 5945, 'David Thompson', 'dthompson@kidzplayhouse.com', '+442071234567'), + (9, 'BeautyTrending', 'Cosmetics and beauty supplies', 'https://beautytrending.com', 'https://beautytrending.com/bt-logo.svg', '2021-10-15 00:00:00', 'NZ', NULL, 'Auckland', '1010', '129 Queen St, Auckland 1010', 'active', 'Retail', 'Health & Beauty', 5977, 'Sophie Wilson', 'swilson@beautytrending.com', '+6493012345'), + (10, 'GameRush', 'Video games and gaming accessories', 'https://gamerush.co', 'https://gamerush.co/gr-logo.png', '2023-02-01 00:00:00', 'US', 'New York', 'New York', '10001', '303 Park Ave S, New York NY 10001', 'active', 'Retail', 'Electronics', 5735, 'Michael Davis', 'mdavis@gamerush.co', '+16463012345'), + (11, 'FashionTrend', 'Trendy clothing and accessories', 'https://www.fashiontrend.com', 'https://www.fashiontrend.com/logo.png', '2019-08-10 00:00:00', 'UK', NULL, 'Manchester', 'M2 4WU', '87 Deansgate, Manchester M2 4WU', 'active', 'Retail', 'Apparel', 5651, 'Emma Thompson', 'ethompson@fashiontrend.com', '+441612345678'), + (12, 'GreenGourmet', 'Organic foods and natural products', 'https://www.greengourmet.com', 'https://www.greengourmet.com/logo.jpg', '2020-12-05 00:00:00', 'CA', 'British Columbia', 'Vancouver', 'V6B 6B1', '850 W Hastings St, Vancouver BC V6B 6B1', 'active', 'Food & Dining', 'Groceries', 5411, 'Daniel Lee', 'dlee@greengourmet.com', '+16041234567'), + (13, 'PetParadise', 'Pet supplies and accessories', 'https://petparadise.com', 'https://petparadise.com/logo.png', '2018-03-20 00:00:00', 'AU', 'New South Wales', 'Sydney', '2000', '275 Pitt St, Sydney NSW 2000', 'active', 'Retail', 'Pets', 5995, 'Olivia Johnson', 'ojohnson@petparadise.com', '+61298765432'), + (14, 'HomeTechSolutions', 'Smart home devices and gadgets', 'https://hometechsolutions.net', 'https://hometechsolutions.net/logo.png', '2022-04-15 00:00:00', 'US', 'California', 'San Francisco', '94105', '350 Mission St, San Francisco CA 94105', 'active', 'Retail', 'Home Appliances', 5734, 'Ethan Brown', 'ebrown@hometechsolutions.net', '+14159876543'), + (15, 'BookWorms', 'Books and reading accessories', 'https://bookworms.co.uk', 'https://bookworms.co.uk/logo.png', '2017-06-30 00:00:00', 'UK', NULL, 'London', 'WC2H 9JA', '66-67 Tottenham Court Rd, London WC2H 9JA', 'active', 'Retail', 'Books', 5942, 'Sophia Turner', 'sturner@bookworms.co.uk', '+442078912345'); + +-- coupons +INSERT INTO coupons (cid, merchant_id, code, description, start_date, end_date, discount_type, discount_value, min_purchase_amount, max_discount_amount, redemption_limit, status, created_at, updated_at) +VALUES + (1, 1, 'TECH20', '20% off tech and electronics', '2023-05-01', '2023-05-31', 'percentage', 20.00, 100.00, NULL, 500, 'active', '2023-04-01 09:00:00', '2023-04-15 11:30:00'), + (2, 2, 'NEWYEAR30', '30% off workout gear', '2023-01-01', '2023-01-15', 'percentage', 30.00, NULL, NULL, 1000, 'expired', '2022-12-01 12:00:00', '2023-01-16 18:45:00'), + (3, 3, 'DINEDISCOUNT', 'Get $10 off $50 order', '2023-06-01', '2023-06-30', 'fixed_amount', 10.00, 50.00, 10.00, NULL, 'active', '2023-05-15 15:30:00', NULL), + (4, 4, 'HOME15', '15% off weekly rental', '2023-07-01', '2023-08-31', 'percentage', 15.00, 1000.00, 300.00, 200, 'active', '2023-05-01 09:15:00', NULL), + (5, 5, 'HOME10', '$10 off $75+ purchase', '2023-04-01', '2023-04-30', 'fixed_amount', 10.00, 75.00, 10.00, 300, 'inactive', '2023-03-01 14:00:00', '2023-05-05 10:30:00'), + (6, 6, 'CODENEW25', '25% off new subscriptions', '2023-03-01', '2023-03-31', 'percentage', 25.00, NULL, NULL, NULL, 'expired', '2023-02-15 11:00:00', '2023-04-01 09:30:00'), + (7, 7, 'ZENHOME', 'Get 20% off home items', '2023-09-01', '2023-09-30', 'percentage', 20.00, 50.00, NULL, 1500, 'active', '2023-08-15 16:45:00', NULL), + (8, 8, 'GAMEKIDS', '$15 off $100+ purchase', '2022-12-01', '2022-12-31', 'fixed_amount', 15.00, 100.00, 15.00, 800, 'expired', '2022-11-01 10:30:00', '2023-01-02 13:15:00'), + (9, 9, 'GLOWUP', 'Buy 2 get 1 free on cosmetics', '2023-10-15', '2023-10-31', 'fixed_amount', 50.00, 150.00, 50.00, 300, 'active', '2023-10-01 08:00:00', NULL), + (10, 10, 'GAMERALERT', 'Get 25% off accessories', '2023-03-01', '2023-03-15', 'percentage', 25.00, NULL, 50.00, 750, 'expired', '2023-02-15 14:30:00', '2023-03-16 12:00:00'); + + +-- wallet_transactions_daily +INSERT INTO wallet_transactions_daily (txid, sender_id, sender_type, receiver_id, receiver_type, amount, status, type, description, coupon_id, created_at, completed_at, transaction_ref, gateway_name, gateway_ref, device_id, ip_address, user_agent) +VALUES + (1, 1, 0, 1, 0, 99.99, 'success', 'debit', 'Online purchase', NULL, '2023-06-01 10:15:30', '2023-06-01 10:15:45', 'ad154bf7-8185-4230-a8d8-3ef59b4e0012', 'Stripe', 'tx_123abc456def', 'mobile_8fh2k1', '192.168.0.1', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_3_1 like Mac OS X) ...'), + (2, 1, 0, 1, 1, 20.00, 'success', 'credit', 'Coupon discount', 1, '2023-06-01 10:15:30', '2023-06-01 10:15:45', 'ad154bf7-8185-4230-a8d8-3ef59b4e0012', 'Stripe', 'tx_123abc456def', 'mobile_8fh2k1', '192.168.0.1', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_3_1 like Mac OS X) ...'), + (3, 2, 0, 1, 1, 16.00, 'success', 'credit', 'Coupon discount', 1, '2023-07-01 10:18:30', '2023-06-01 10:18:45', 'kd454bf7-428d-eig2-a8d8-3ef59b4e0012', 'Stripe', 'tx_123abc789gas', 'mobile_yjp08q', '198.51.100.233', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4 like Mac OS X) ...'), + (4, 3, 1, 9, 0, 125.50, 'success', 'debit', 'Product purchase', NULL, '2023-06-01 13:22:18', '2023-06-01 13:22:45', 'e6f510e9-ff7d-4914-81c2-f8e56bae4012', 'PayPal', 'ppx_192ks8hl', 'web_k29qjd', '216.58.195.68', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ...'), + (5, 9, 0, 3, 1, 42.75, 'pending', 'debit', 'Order #438721', 3, '2023-06-01 18:45:02', '2023-06-01 18:45:13', 'b2ca190e-a42f-4f5e-8318-f82bcc6ae64e', 'Stripe', 'tx_987zyx654wvu', 'mobile_q3mz8n', '68.85.32.201', 'Mozilla/5.0 (Linux; Android 13) ...'), + (6, 9, 0, 3, 1, 10.00, 'success', 'credit', 'Coupon discount', 3, '2023-06-01 18:45:02', '2023-06-01 18:45:13', 'b2ca190e-a42f-4f5e-8318-f82bcc6ae64e', 'Stripe', 'tx_987zyx654wvu', 'mobile_q3mz8n', '68.85.32.201', 'Mozilla/5.0 (Linux; Android 13) ...'), + (7, 2, 0, 7, 1, 89.99, 'pending', 'debit', 'Home furnishings', NULL, '2023-06-02 09:30:25', '2023-06-02 09:30:40', 'c51e10d1-db34-4d9f-b55f-43a05a5481c8', 'Checkout.com', 'ord_kzhg123', 'mobile_yjp08q', '198.51.100.233', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4 like Mac OS X) ...'), + (8, 2, 0, 7, 1, 17.99, 'success', 'credit', 'Coupon discount', 7, '2023-06-02 09:30:25', '2023-06-02 09:30:40', 'c51e10d1-db34-4d9f-b55f-43a05a5481c8', 'Checkout.com', 'ord_kzhg123', 'mobile_yjp08q', '198.51.100.233', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4 like Mac OS X) ...'), + (9, 6, 1, 1, 0, 29.95, 'success', 'debit', 'Software subscription', NULL, '2023-06-02 14:15:00', '2023-06-02 14:15:05', '25cd48e5-08c3-4d1c-b7a4-26485ea646eb', 'Braintree', 'sub_mnb456', 'web_zz91p44l', '4.14.15.90', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ...'), + (10, 4, 0, 4, 1, 2500.00, 'pending', 'debit', 'Villa rental deposit', NULL, '2023-06-02 20:45:36', NULL, 'a7659c81-0cd0-4635-af6c-cf68d2c15ab2', 'PayPal', NULL, 'mobile_34jdkl', '143.92.64.138', 'Mozilla/5.0 (Linux; Android 11; Pixel 5) ...'), + (11, 5, 0, 5, 1, 55.99, 'success', 'debit', 'Craft supplies order', NULL, '2023-06-03 11:12:20', '2023-06-03 11:12:35', 'ec74cb3b-8272-4175-a5d0-f03c2e781593', 'Adyen', 'ord_tkjs87', 'web_8902wknz', '192.64.112.188', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ...'), + (12, 9, 0, 9, 1, 75.00, 'success', 'debit', 'Beauty products', 9, '2023-06-04 08:00:00', '2023-06-04 08:00:25', '840a9854-1b07-422b-853c-636b289222a9', 'Checkout.com', 'ord_kio645', 'mobile_g3mjfz', '203.96.81.36', 'Mozilla/5.0 (Linux; Android 12; SM-S906N Build/QP1A.190711.020) ...'), + (13, 9, 0, 9, 1, 50.00, 'success', 'credit', 'Coupon discount', 9, '2023-06-04 08:00:00', '2023-06-04 08:00:25', '840a9854-1b07-422b-853c-636b289222a9', 'Checkout.com', 'ord_kio645', 'mobile_g3mjfz', '203.96.81.36', 'Mozilla/5.0 (Linux; Android 12; SM-S906N Build/QP1A.190711.020) ...'), + (14, 8, 0, 10, 1, 119.99, 'failed', 'debit', 'New game purchase', NULL, '2023-06-04 19:30:45', NULL, '32e2b29c-5c7f-4906-98c5-e8abdcbfd69a', 'Braintree', 'ord_mjs337', 'web_d8180kaf', '8.26.53.165', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ...'), + (15, 8, 0, 10, 1, 29.99, 'success', 'credit', 'Coupon discount', 10, '2023-06-04 19:30:45', '2023-06-04 19:31:10', '32e2b29c-5c7f-4906-98c5-e8abdcbfd69a', 'Braintree', 'ord_mjs337', 'web_d8180kaf', '8.26.53.165', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ...'), + (16, 10, 1, 3, 0, 87.50, 'failed', 'debit', 'Restaurant order', NULL, '2023-06-05 12:05:21', NULL, '37cf052d-0475-4ecc-bda7-73ee904bf65c', 'Checkout.com', NULL, 'mobile_x28qlj', '92.110.51.150', 'Mozilla/5.0 (Linux; Android 13; SM-S901B) ...'), + (17, 1, 0, 1, 0, 175.00, 'success', 'debit', 'Refund on order #1234', NULL, '2023-06-06 14:20:00', '2023-06-06 14:20:05', 'a331232e-a3f6-4e7f-b49f-3588bc5ff985', 'Stripe', 'rfnd_xkt521', 'web_33lq1dh', '38.75.197.8', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ...'), + (18, 7, 1, 2, 0, 599.99, 'success', 'debit', 'Yearly subscription', NULL, '2023-06-06 16:55:10', '2023-06-06 16:55:15', 'ed6f46ab-9617-4d11-9aa9-60d24bdf9bc0', 'PayPal', 'sub_pjj908', 'web_zld22f', '199.59.148.201', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ...'), + (19, 2, 0, 2, 1, 22.99, 'refunded', 'debit', 'Product return', NULL, '2023-06-07 10:10:30', '2023-06-07 10:11:05', '6c97a87d-610f-4705-ae97-55071127d9ad', 'Adyen', 'tx_zcx258', 'mobile_1av8p0', '70.121.39.25', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4 like Mac OS X) ...'), + (20, 2, 0, 2, 1, 22.99, 'success', 'credit', 'Refund on return', NULL, '2023-06-07 10:10:30', '2023-06-07 10:11:05', '6c97a87d-610f-4705-ae97-55071127d9ad', 'Adyen', 'tx_zcx258', 'mobile_1av8p0', '70.121.39.25', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4 like Mac OS X) ...'), + (21, 1, 0, 2, 1, 49.99, 'success', 'debit', 'Product purchase', NULL, CURRENT_TIMESTAMP - INTERVAL '5 months', CURRENT_TIMESTAMP - INTERVAL '5 months', 'tx_ref_11_1', 'Stripe', 'stripe_ref_11_1', 'device_11_1', '192.168.1.11', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'), + (22, 4, 0, 3, 1, 99.99, 'success', 'debit', 'Service purchase', NULL, CURRENT_TIMESTAMP - INTERVAL '4 months', CURRENT_TIMESTAMP - INTERVAL '4 months', 'tx_ref_12_1', 'PayPal', 'paypal_ref_12_1', 'device_12_1', '192.168.1.12', 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1'), + (23, 4, 0, 1, 1, 149.99, 'success', 'debit', 'Subscription purchase', NULL, CURRENT_TIMESTAMP - INTERVAL '3 months', CURRENT_TIMESTAMP - INTERVAL '3 months', 'tx_ref_13_1', 'Stripe', 'stripe_ref_13_1', 'device_13_1', '192.168.1.13', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'), + (24, 2, 0, 5, 1, 199.99, 'pending', 'debit', 'Product purchase', NULL, CURRENT_TIMESTAMP - INTERVAL '2 months', CURRENT_TIMESTAMP - INTERVAL '2 months', 'tx_ref_14_1', 'PayPal', 'paypal_ref_14_1', 'device_14_1', '192.168.1.14', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'), + (25, 2, 0, 1, 1, 249.99, 'success', 'debit', 'Service purchase', NULL, CURRENT_TIMESTAMP - INTERVAL '1 month', CURRENT_TIMESTAMP - INTERVAL '1 month', 'tx_ref_15_1', 'Stripe', 'stripe_ref_15_1', 'device_15_1', '192.168.1.15', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'), + (26, 7, 1, 2, 0, 299.99, 'success', 'debit', 'Renew subscription', NULL, CURRENT_TIMESTAMP - INTERVAL '3 weeks', CURRENT_TIMESTAMP - INTERVAL '3 weeks', 'ed6f46ab-9617-4d11-9aa9-55071127d9ad', 'PayPal', 'sub_pjk832', 'web_zld22f', '199.59.148.201', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ...'); + + +-- wallet_user_balance_daily +INSERT INTO wallet_user_balance_daily (user_id, balance, updated_at) +VALUES + (1, 525.80, '2023-06-07 23:59:59'), + (2, 429.76, '2023-06-07 23:59:59'), + (3, -725.55, '2023-06-07 23:59:59'), + (4, -2500.00, '2023-06-07 23:59:59'), + (5, -55.99, '2023-06-07 23:59:59'), + (6, 0.00, '2023-06-07 23:59:59'), + (7, 0.00, '2023-06-07 23:59:59'), + (8, -599.98, '2023-06-07 23:59:59'), + (9, -183.25, '2023-06-07 23:59:59'), + (10, 0.00, '2023-06-07 23:59:59'), + (1, 2739.10, CURRENT_TIMESTAMP - INTERVAL '8 days'), + (1, 2738.12, CURRENT_TIMESTAMP - INTERVAL '6 days'), + (1, 2733.92, CURRENT_TIMESTAMP - INTERVAL '3 days'), + (2, 155.24, CURRENT_TIMESTAMP - INTERVAL '7 days'), + (3, 2775.25, CURRENT_TIMESTAMP - INTERVAL '6 days'), + (4, 2500.00, CURRENT_TIMESTAMP - INTERVAL '5 days'), + (5, 155.99, CURRENT_TIMESTAMP - INTERVAL '4 days'), + (6, 29.95, CURRENT_TIMESTAMP - INTERVAL '3 days'), + (7, 172.98, CURRENT_TIMESTAMP - INTERVAL '2 days'), + (8, 0.00, CURRENT_TIMESTAMP - INTERVAL '7 days'), + (9, 125.00, CURRENT_TIMESTAMP - INTERVAL '3 days'), + (10, 219.98, CURRENT_TIMESTAMP - INTERVAL '1 days'); + +-- wallet_merchant_balance_daily +INSERT INTO wallet_merchant_balance_daily (merchant_id, balance, updated_at) +VALUES + (1, 3897.99, '2023-06-07 23:59:59'), + (2, 155.24, '2023-06-07 23:59:59'), + (3, 2775.25, '2023-06-07 23:59:59'), + (4, 2500.00, '2023-06-07 23:59:59'), + (5, 155.99, '2023-06-07 23:59:59'), + (6, 29.95, '2023-06-07 23:59:59'), + (7, 172.98, '2023-06-07 23:59:59'), + (8, 0.00, '2023-06-07 23:59:59'), + (9, 125.00, '2023-06-07 23:59:59'), + (10, 219.98, '2023-06-07 23:59:59'), + (1, 82.10, CURRENT_DATE - INTERVAL '8 days'), + (2, 82.12, CURRENT_DATE - INTERVAL '8 days'), + (1, 82.92, CURRENT_DATE - INTERVAL '7 days'), + (2, 55.24, CURRENT_DATE - INTERVAL '7 days'), + (3, 75.25, CURRENT_DATE - INTERVAL '7 days'), + (1, 50.00, CURRENT_DATE), + (2, 55.99, CURRENT_DATE), + (3, 29.95, CURRENT_DATE), + (4, 89.99, CURRENT_DATE), + (5, 599.99, CURRENT_DATE); + +-- notifications +delete from notifications +INSERT INTO notifications (id, user_id, message, type, status, created_at, device_type, device_id, action_url) +VALUES +(1, 1, 'Your order #123abc has been shipped!', 'transaction', 'unread', '2023-06-01 10:16:00', 'mobile_app', 'mobile_8fh2k1', 'app://orders/123abc'), +(2, 1, 'Get 20% off your next purchase! Limited time offer.', 'promotion', 'unread', '2023-06-02 09:00:00', 'email', NULL, 'https://techmart.com/promo/TECH20'), +(3, 2, 'A package is being returned to you. Refund processing...', 'transaction', 'read', '2023-06-07 10:12:00', 'mobile_app', 'mobile_1av8p0', 'app://orders?status=returned'), +(4, 2, 'Your FitLife membership is up for renewal on 7/1', 'general', 'unread', '2023-06-05 15:30:00', 'email', NULL, 'https://fitlifegear.com/renew'), +(5, 3, 'An order from UrbanDining was unsuccessful', 'transaction', 'read', '2023-06-05 12:06:00', 'sms', NULL, 'https://urbandining.co/orders/37cf052d'), +(6, 4, 'Your rental request is pending approval', 'transaction', 'unread', '2023-06-02 20:46:00', 'mobile_app', 'mobile_34jdkl', 'app://bookings/a7659c81'), +(7, 5, 'Claim your 25% discount on craft supplies!', 'promotion', 'archived', '2023-06-01 08:00:00', 'email', NULL, 'https://handycraft.store/CRAFTY10'), +(8, 6, 'Your CodeSuite subscription will renew on 7/1', 'general', 'unread', '2023-06-01 12:00:00', 'email', NULL, 'https://codesuite.io/subscriptions'), +(9, 7, 'Thanks for shopping at ZenHomeGoods! How did we do?', 'general', 'read', '2023-06-02 09:31:00', 'mobile_app', 'mobile_yjp08q', 'https://zenhomesurvey.com/order/c51e10d1'), +(10, 8, 'Playtime! New games and toys have arrived', 'promotion', 'archived', '2023-06-01 18:00:00', 'email', NULL, 'https://kidzplayhouse.com/new-arrivals'), +(11, 9, 'Here''s $10 to start your glow up!', 'promotion', 'unread', '2023-06-01 10:15:00', 'email', NULL, 'https://beautytrending.com/new-customer'), +(12, 10, 'Your order #ord_mjs337 is being processed', 'transaction', 'read', '2023-06-04 19:31:30', 'web_app', 'web_d8180kaf', 'https://gamerush.co/orders/32e2b29c'), +(13, 1, 'New promotion: Get 10% off your next order!', 'promotion', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '1 week', 'email', NULL, 'https://techmart.com/promo/TECH10'), +(14, 1, 'Your order #456def has been delivered', 'transaction', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '2 weeks', 'mobile_app', 'mobile_8fh2k1', 'app://orders/456def'), +(15, 2, 'Reminder: Your FitLife membership expires in 7 days', 'general', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '3 weeks', 'email', NULL, 'https://fitlifegear.com/renew'), +(16, 2, 'Weekend Flash Sale: 25% off all activewear!', 'promotion', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '1 week' + INTERVAL '2 days', 'mobile_app', 'mobile_yjp08q', 'app://shop/activewear'); + +-- user_sessions +INSERT INTO user_sessions (user_id, session_start_ts, session_end_ts, device_type, device_id) +VALUES +(1, '2023-06-01 09:45:22', '2023-06-01 10:20:35', 'mobile_app', 'mobile_8fh2k1'), +(1, '2023-06-02 13:30:00', '2023-06-02 14:15:15', 'web_app', 'web_33lq1dh'), +(1, '2023-06-06 14:19:00', '2023-06-06 14:22:10', 'web_app', 'web_33lq1dh'), +(1, '2023-06-07 23:49:12', '2023-06-08 00:00:00', 'web_app', 'web_33lq1dh'), +(2, '2023-06-02 08:55:08', '2023-06-02 09:45:42', 'mobile_app', 'mobile_yjp08q'), +(2, '2023-06-07 10:09:15', '2023-06-07 10:12:25', 'mobile_app', 'mobile_1av8p0'), +(3, '2023-06-01 13:15:33', '2023-06-01 13:28:01', 'web_app', 'web_k29qjd'), +(3, '2023-06-05 12:00:00', '2023-06-05 12:10:22', 'mobile_app', 'mobile_x28qlj'), +(4, '2023-06-02 20:30:12', '2023-06-02 21:15:48', 'mobile_app', 'mobile_34jdkl'), +(5, '2023-06-03 10:45:30', '2023-06-03 11:20:28', 'web_app', 'web_8902wknz'), +(6, '2023-06-02 14:00:00', '2023-06-02 15:10:05', 'web_app', 'web_zz91p44l'), +(7, '2023-06-06 16:45:22', '2023-06-06 17:10:40', 'web_app', 'web_zld22f'), +(8, '2023-06-04 19:25:15', '2023-06-04 19:40:20', 'web_app', 'web_d8180kaf'), +(8, '2023-06-01 17:30:00', '2023-06-01 18:15:35', 'mobile_app', 'mobile_q3mz8n'), +(9, '2023-06-04 07:45:30', '2023-06-04 08:15:27', 'mobile_app', 'mobile_g3mjfz'), +(10, '2023-06-02 14:10:15', '2023-06-02 14:40:58', 'web_app', 'web_zz91p44l'), +(5, CURRENT_TIMESTAMP - INTERVAL '32 days', CURRENT_TIMESTAMP - INTERVAL '32 days' + INTERVAL '15 min', 'web_app', 'web_8902wknz'), +(6, CURRENT_TIMESTAMP - INTERVAL '8 days', CURRENT_TIMESTAMP - INTERVAL '8 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), +(7, CURRENT_TIMESTAMP - INTERVAL '5 days', CURRENT_TIMESTAMP - INTERVAL '5 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), +(8, CURRENT_TIMESTAMP - INTERVAL '3 days', CURRENT_TIMESTAMP - INTERVAL '3 days' + INTERVAL '15 min', 'web_app', 'web_d8180kaf'), +(9, CURRENT_TIMESTAMP - INTERVAL '1 days', CURRENT_TIMESTAMP - INTERVAL '1 days' + INTERVAL '15 min', 'mobile_app', 'mobile_g3mjfz'), +(10, CURRENT_TIMESTAMP - INTERVAL '2 days', CURRENT_TIMESTAMP - INTERVAL '2 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), +(5, CURRENT_TIMESTAMP - INTERVAL '2 days', CURRENT_TIMESTAMP - INTERVAL '2 days' + INTERVAL '15 min', 'web_app', 'web_8902wknz') +; + +-- user_setting_snapshot +INSERT INTO user_setting_snapshot (user_id, snapshot_date, tx_limit_daily, tx_limit_monthly, membership_status, password_hash, api_key, verified_devices, verified_ips, mfa_enabled, marketing_opt_in, created_at) +VALUES +(1, '2023-06-07', 1000.00, 5000.00, 2, 'bcryptHash($2yz9!&ka1)', '9d61c49b-8977-4914-a36b-80d1445e38fa', 'mobile_8fh2k1', '192.168.0.1', true, false, '2023-06-07 00:00:00'), +(2, '2023-06-07', 500.00, 2500.00, 1, 'bcryptHash(qpwo9874zyGk!)', NULL, 'mobile_yjp08q, mobile_1av8p0', '198.51.100.233, 70.121.39.25', false, true, '2023-06-07 00:00:00'), +(3, '2023-06-07', 2000.00, 10000.00, 3, 'bcryptHash(Fr3nchPa1n!@98zy)', 'e785f611-fdd8-4c2d-a870-e104358712e5', 'web_k29qjd, mobile_x28qlj', '216.58.195.68, 92.110.51.150', true, false, '2023-06-07 00:00:00'), +(4, '2023-06-07', 5000.00, 20000.00, 4, 'bcryptHash(Vacay2023*&!Rm)', NULL, 'mobile_34jdkl', '143.92.64.138', false, true, '2023-06-07 00:00:00'), +(5, '2023-06-07', 100.00, 500.00, 0, 'bcryptHash(cRaf7yCr8zy)', NULL, 'web_8902wknz', '192.64.112.188', false, false, '2023-06-07 00:00:00'), +(6, '2023-06-07', 50.00, 500.00, 1, 'bcryptHash(C0d3Rul3z!99)', '6c03c175-9ac9-4854-b064-a3fff2c62e31', 'web_zz91p44l', '4.14.15.90', true, true, '2023-06-07 00:00:00'), +(7, '2023-06-07', 250.00, 1000.00, 2, 'bcryptHash(zEnH0me&Pw7)', NULL, NULL, NULL, false, true, '2023-06-07 00:00:00'), +(8, '2023-06-07', 200.00, 1000.00, 0, 'bcryptHash(K1dzPlay!&Rt8)', NULL, 'web_d8180kaf, mobile_q3mz8n', '8.26.53.165, 68.85.32.201', false, false, '2023-06-07 00:00:00'), +(9, '2023-06-07', 150.00, 1000.00, 2, 'bcryptHash(Gl0wUp7!9zy)', NULL, 'mobile_g3mjfz', '203.96.81.36', true, true, '2023-06-07 00:00:00'), +(10, '2023-06-07', 300.00, 2000.00, 1, 'bcryptHash(GamzRu1ez*&99!)', NULL, 'web_d8180kaf', '8.26.53.165', false, true, '2023-06-07 00:00:00'), +(1, '2023-06-01', 502.00, 1000.00, 2, 'bcryptHash($2yz9!&ka1)', '9d61c49b-8977-4914-a36b-80d1445e38fa', 'mobile_8fh2k1', '192.168.0.1', false, true, '2023-06-01 06:00:00'), +(2, '2023-06-01', 500.00, 2500.00, 1, 'bcryptHash(qpwo9874zyGk!)', NULL, 'mobile_yjp08q', '198.51.100.233, 70.121.39.25', true, false, '2023-06-01 09:00:00'); \ No newline at end of file diff --git a/tests/gen_data/init_defog_sqlite.sql b/tests/gen_data/init_defog_sqlite.sql index 8c81a3f48..94db5d0af 100644 --- a/tests/gen_data/init_defog_sqlite.sql +++ b/tests/gen_data/init_defog_sqlite.sql @@ -270,8 +270,8 @@ CREATE TABLE user_setting_snapshot (user_id INTEGER NOT NULL, snapshot_date DATE verified_devices TEXT /* comma separated list of device ids */, verified_ips TEXT /* comma separated list of IP addresses */, mfa_enabled INTEGER, marketing_opt_in INTEGER, created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP); INSERT INTO users (uid, username, email, phone_number, created_at, user_type, status, country, address_billing, address_delivery, kyc_status) VALUES - (1, 'john_doe', 'john.doe@email.com', '+1234567890', DATETIME('now', '-1 month'), 'individual', 'active', 'US', '123 Main St, Anytown US 12345', '123 Main St, Anytown US 12345', 'approved'), - (2, 'jane_smith', 'jane.smith@email.com', '+9876543210', DATETIME('now', '-2 months'), 'individual', 'active', 'CA', '456 Oak Rd, Toronto ON M1M2M2', '456 Oak Rd, Toronto ON M1M2M2', 'approved'), + (1, 'john_doe', 'john.doe@email.com', '+1234567890', DATE('now', 'start of month', '-1 month'), 'individual', 'active', 'US', '123 Main St, Anytown US 12345', '123 Main St, Anytown US 12345', 'approved'), + (2, 'jane_smith', 'jane.smith@email.com', '+9876543210', DATE('now', 'start of month', '-2 months'), 'individual', 'active', 'CA', '456 Oak Rd, Toronto ON M1M2M2', '456 Oak Rd, Toronto ON M1M2M2', 'approved'), (3, 'bizuser', 'contact@business.co', '+1234509876', '2021-06-01 09:15:00', 'business', 'active', 'FR', '12 Rue Baptiste, Paris 75001', NULL, 'approved'), (4, 'david_miller', 'dave@personal.email', '+4477788899', '2023-03-20 18:45:00', 'individual', 'inactive', 'GB', '25 London Road, Manchester M12 4XY', '25 London Road, Manchester M12 4XY', 'pending'), (5, 'emily_wilson', 'emily.w@gmail.com', '+8091017161', '2021-11-03 22:10:00', 'individual', 'suspended', 'AU', '72 Collins St, Melbourne VIC 3000', '19 Smith St, Brunswick VIC 3056', 'rejected'), @@ -398,10 +398,10 @@ INSERT INTO notifications (id, user_id, message, type, status, created_at, devic (10, 8, 'Playtime! New games and toys have arrived', 'promotion', 'archived', '2023-06-01 18:00:00', 'email', NULL, 'https://kidzplayhouse.com/new-arrivals'), (11, 9, 'Here''s $10 to start your glow up!', 'promotion', 'unread', '2023-06-01 10:15:00', 'email', NULL, 'https://beautytrending.com/new-customer'), (12, 10, 'Your order #ord_mjs337 is being processed', 'transaction', 'read', '2023-06-04 19:31:30', 'web_app', 'web_d8180kaf', 'https://gamerush.co/orders/32e2b29c'), - (13, 1, 'New promotion: Get 10% off your next order!', 'promotion', 'unread', DATETIME('now', '-7 days'), 'email', NULL, 'https://techmart.com/promo/TECH10'), - (14, 1, 'Your order #456def has been delivered', 'transaction', 'unread', DATETIME('now', '-14 days'), 'mobile_app', 'mobile_8fh2k1', 'app://orders/456def'), - (15, 2, 'Reminder: Your FitLife membership expires in 7 days', 'general', 'unread', DATETIME('now', '-21 days'), 'email', NULL, 'https://fitlifegear.com/renew'), - (16, 2, 'Weekend Flash Sale: 25% off all activewear!', 'promotion', 'unread', DATETIME('now', '-7 days', '+2 day'), 'mobile_app', 'mobile_yjp08q', 'app://shop/activewear'); + (13, 1, 'New promotion: Get 10% off your next order!', 'promotion', 'unread', DATE('now','-' || ((CAST(STRFTIME('%w','now') AS INTEGER)+6) % 7 + 7*1) || ' days'), 'email', NULL, 'https://techmart.com/promo/TECH10'), + (14, 1, 'Your order #456def has been delivered', 'transaction', 'unread', DATE('now','-' || ((CAST(STRFTIME('%w','now') AS INTEGER)+6) % 7 + 7*2) || ' days'), 'mobile_app', 'mobile_8fh2k1', 'app://orders/456def'), + (15, 2, 'Reminder: Your FitLife membership expires in 7 days', 'general', 'unread', DATE('now','-' || ((CAST(STRFTIME('%w','now') AS INTEGER)+6) % 7 + 7*3) || ' days'), 'email', NULL, 'https://fitlifegear.com/renew'), + (16, 2, 'Weekend Flash Sale: 25% off all activewear!', 'promotion', 'unread', DATE('now','-' || ((CAST(STRFTIME('%w','now') AS INTEGER)+6) % 7 + 7*1 - 2) || ' days'), 'mobile_app', 'mobile_yjp08q', 'app://shop/activewear'); INSERT INTO user_sessions (user_id, session_start_ts, session_end_ts, device_type, device_id) VALUES (1, '2023-06-01 09:45:22', '2023-06-01 10:20:35', 'mobile_app', 'mobile_8fh2k1'), @@ -531,12 +531,12 @@ INSERT INTO sales (_id, car_id, salesperson_id, customer_id, sale_price, sale_da (14, 2, 3, 1, 23200.00, DATE('now', '-21 days')), (15, 8, 6, 12, 43500.00, DATE('now', '-3 days')), (16, 10, 4, 2, 29500.00, DATE('now', '-5 days')), - (17, 3, 2, 3, 46000.00, DATE('now', '-7 days', '+1 day')), - (18, 3, 2, 7, 47500.00, DATE('now', '-7 days')), - (19, 3, 2, 10, 46500.00, DATE('now', '-7 days', '-1 day')), - (20, 4, 1, 3, 48000.00, DATE('now', '-56 days', '+1 day')), - (21, 4, 1, 7, 45000.00, DATE('now', '-56 days')), - (22, 4, 1, 10, 49000.00, DATE('now', '-56 days', '-1 day')); + (17, 3, 2, 3, 46000.00, DATE('now', '-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*1 - 1) || ' days')), + (18, 3, 2, 7, 47500.00, DATE('now', '-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*1) || ' days')), + (19, 3, 2, 10, 46500.00, DATE('now', '-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*1 + 1) || ' days')), + (20, 4, 1, 3, 48000.00, DATE('now', '-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*8 - 1) || ' days')), + (21, 4, 1, 7, 45000.00, DATE('now', '-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*8 ) || ' days')), + (22, 4, 1, 10, 49000.00, DATE('now', '-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*8 + 1) || ' days')); INSERT INTO inventory_snapshots (_id, snapshot_date, car_id, is_in_inventory) VALUES (1, '2023-03-15', 1, TRUE), @@ -581,12 +581,12 @@ INSERT INTO payments_received (_id, sale_id, payment_date, payment_amount, payme (15, 14, DATE('now', '-1 days'), 17200.00, 'financing'), (16, 15, DATE('now', '-1 days'), 37500.00, 'credit_card'), (17, 16, DATE('now', '-5 days'), 26500.00, 'debit_card'), - (18, 17, DATE('now', '-7 days', '+1 day'), 115000.00, 'financing'), - (19, 18, DATE('now', '-7 days'), 115000.00, 'credit_card'), - (20, 19, DATE('now', '-7 days', '-1 day'), 115000.00, 'debit_card'), - (21, 20, DATE('now', '-56 days', '+1 day'), 115000.00, 'cash'), - (22, 21, DATE('now', '-56 days'), 115000.00, 'check'), - (23, 22, DATE('now', '-56 days', '-1 day'), 115000.00, 'credit_card'); + (18, 17, DATE('now','-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*1 - 1) || ' days'), 115000.00, 'financing'), + (19, 18, DATE('now','-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*1) || ' days'), 115000.00, 'credit_card'), + (20, 19, DATE('now','-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*1 + 1) || ' days'), 115000.00, 'debit_card'), + (21, 20, DATE('now','-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*8 - 1) || ' days'), 115000.00, 'cash'), + (22, 21, DATE('now','-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*8) || ' days'), 115000.00, 'check'), + (23, 22, DATE('now','-' || ((CAST(STRFTIME('%w','now') AS INTEGER) + 6) % 7 + 7*8 + 1) || ' days'), 115000.00, 'credit_card'); INSERT INTO payments_made (_id, vendor_name, payment_date, payment_amount, payment_method, invoice_number, invoice_date, due_date) VALUES (1, 'Car Manufacturer Inc', '2023-03-01', 150000.00, 'bank_transfer', 'INV-001', '2023-02-25', '2023-03-25'), diff --git a/tests/gen_data/sf_task.sql b/tests/gen_data/sf_task.sql new file mode 100644 index 000000000..7a62b0ee0 --- /dev/null +++ b/tests/gen_data/sf_task.sql @@ -0,0 +1,579 @@ +------------------------------------ +-- 1. Create the stored procedure -- +------------------------------------ + +CREATE OR REPLACE PROCEDURE Defog.BROKER.defog_daily_update() +RETURNS STRING +LANGUAGE SQL +AS +$$ +BEGIN + + DELETE FROM DEFOG.EWALLET.USERS; + INSERT INTO DEFOG.EWALLET.USERS(uid, username, email, phone_number, created_at, user_type, status, country, address_billing, address_delivery, kyc_status) +VALUES + (1, 'john_doe', 'john.doe@email.com', '+1234567890', DATE_TRUNC('month', CURRENT_TIMESTAMP) - INTERVAL '1 month', 'individual', 'active', 'US', '123 Main St, Anytown US 12345', '123 Main St, Anytown US 12345', 'approved'), + (2, 'jane_smith', 'jane.smith@email.com', '+9876543210', DATE_TRUNC('month', CURRENT_TIMESTAMP) - INTERVAL '2 months', 'individual', 'active', 'CA', '456 Oak Rd, Toronto ON M1M2M2', '456 Oak Rd, Toronto ON M1M2M2', 'approved'), + (3, 'bizuser', 'contact@business.co', '+1234509876', '2021-06-01 09:15:00', 'business', 'active', 'FR', '12 Rue Baptiste, Paris 75001', NULL, 'approved'), + (4, 'david_miller', 'dave@personal.email', '+4477788899', '2023-03-20 18:45:00', 'individual', 'inactive', 'GB', '25 London Road, Manchester M12 4XY', '25 London Road, Manchester M12 4XY', 'pending'), + (5, 'emily_wilson', 'emily.w@gmail.com', '+8091017161', '2021-11-03 22:10:00', 'individual', 'suspended', 'AU', '72 Collins St, Melbourne VIC 3000', '19 Smith St, Brunswick VIC 3056', 'rejected'), + (6, 'techcorp', 'orders@techcorp.com', '+14165558888', '2018-05-20 11:35:00', 'business', 'active', 'US', '33 Technology Dr, Silicon Valley CA 94301', NULL, 'approved'), + (7, 'shopsmart', 'customerserv@shopsmart.biz', '+6585771234', '2020-09-15 06:25:00', 'business', 'inactive', 'SG', '888 Orchard Rd, #05-000, Singapore 238801', NULL, 'approved'), + (8, 'michael_brown', 'mike.brown@outlook.com', '+3912378624', '2019-07-22 16:40:00', 'individual', 'active', 'DE', 'Heidestr 17, Berlin 10557', 'Heidestr 17, Berlin 10557', 'approved'), + (9, 'alex_taylor', 'ataylo@university.edu', NULL, '2022-08-30 09:15:00', 'individual', 'active', 'NZ', '12 Mardon Rd, Wellington 6012', '5 Boulcott St, Wellington 6011', 'approved'), + (10, 'huang2143', 'huang2143@example.com', '+8612345678901', '2023-12-10 08:00:00', 'individual', 'active', 'CN', '123 Nanjing Road, Shanghai 200000', '123 Nanjing Road, Shanghai 200000', 'approved'), + (11, 'lisa_jones', 'lisa.jones@email.com', '+6123456789', '2023-09-05 15:20:00', 'individual', 'active', 'AU', '789 George St, Sydney NSW 2000', '789 George St, Sydney NSW 2000', 'approved'); + + DELETE FROM DEFOG.EWALLET.MERCHANTS; + INSERT INTO DEFOG.EWALLET.MERCHANTS(mid, name, description, website_url, logo_url, created_at, country, state, city, postal_code, address, status, category, sub_category, mcc, contact_name, contact_email, contact_phone) +VALUES + (1, 'TechMart', 'Leading electronics retailer', 'https://www.techmart.com', 'https://www.techmart.com/logo.png', '2015-01-15 00:00:00', 'US', 'California', 'Los Angeles', '90011', '645 Wilshire Blvd, Los Angeles CA 90011', 'active', 'retail (hardware)', 'Electronics', 5732, 'John Jacobs', 'jjacobs@techmart.com', '+15551234567'), + (2, 'FitLifeGear', 'Fitness equipment and activewear', 'https://fitlifegear.com', 'https://fitlifegear.com/brand.jpg', '2018-07-01 00:00:00', 'CA', 'Ontario', 'Toronto', 'M5V2J2', '421 Richmond St W, Toronto ON M5V2J2', 'active', 'retail (hardware)', 'Sporting Goods', 5655, 'Jane McDonald', 'jmcdonald@fitlifegear.com', '+14165559876'), + (3, 'UrbanDining', 'Local restaurants and cafes', 'https://www.urbandining.co', 'https://www.urbandining.co/logo.png', '2020-03-10 00:00:00', 'FR', NULL, 'Paris', '75011', '35 Rue du Faubourg Saint-Antoine, 75011 Paris', 'active', 'Food & Dining', 'Restaurants', 5812, 'Pierre Gagnon', 'pgagnon@urbandining.co', '+33612345678'), + (4, 'LuxStays', 'Boutique vacation rentals', 'https://luxstays.com', 'https://luxstays.com/branding.jpg', '2016-11-01 00:00:00', 'IT', NULL, 'Rome', '00187', 'Via della Conciliazione 15, Roma 00187', 'inactive', 'Travel & Hospitality', 'Accommodation', 7011, 'Marco Rossi', 'mrossi@luxstays.com', '+39061234567'), + (5, 'HandyCraft', 'Handmade arts and crafts supplies', 'https://handycraft.store', 'https://handycraft.store/hc-logo.png', '2022-06-20 00:00:00', 'ES', 'Catalonia', 'Barcelona', '08003', 'Passeig de Gracia 35, Barcelona 08003', 'active', 'Retail', 'Crafts & Hobbies', 5949, 'Ana Garcia', 'agarcia@handycraft.store', '+34612345678'), + (6, 'CodeSuite', 'SaaS productivity tools for developers', 'https://codesuite.io', 'https://codesuite.io/logo.svg', '2019-02-01 00:00:00', 'DE', NULL, 'Berlin', '10119', 'Dessauer Str 28, 10119 Berlin', 'active', 'Business Services', 'Software', 5734, 'Michael Schmidt', 'mschmidt@codesuite.io', '+49301234567'), + (7, 'ZenHomeGoods', 'Housewares and home decor items', 'https://www.zenhomegoods.com', 'https://www.zenhomegoods.com/branding.jpg', '2014-09-15 00:00:00', 'AU', 'Victoria', 'Melbourne', '3004', '159 Franklin St, Melbourne VIC 3004', 'active', 'Retail', 'Home & Garden', 5719, 'Emily Watson', 'ewatson@zenhomegoods.com', '+61312345678'), + (8, 'KidzPlayhouse', 'Children''s toys and games', 'https://kidzplayhouse.com', 'https://kidzplayhouse.com/logo.png', '2017-04-01 00:00:00', 'GB', NULL, 'London', 'WC2N 5DU', '119 Charing Cross Rd, London WC2N 5DU', 'suspended', 'Retail', 'Toys & Games', 5945, 'David Thompson', 'dthompson@kidzplayhouse.com', '+442071234567'), + (9, 'BeautyTrending', 'Cosmetics and beauty supplies', 'https://beautytrending.com', 'https://beautytrending.com/bt-logo.svg', '2021-10-15 00:00:00', 'NZ', NULL, 'Auckland', '1010', '129 Queen St, Auckland 1010', 'active', 'Retail', 'Health & Beauty', 5977, 'Sophie Wilson', 'swilson@beautytrending.com', '+6493012345'), + (10, 'GameRush', 'Video games and gaming accessories', 'https://gamerush.co', 'https://gamerush.co/gr-logo.png', '2023-02-01 00:00:00', 'US', 'New York', 'New York', '10001', '303 Park Ave S, New York NY 10001', 'active', 'Retail', 'Electronics', 5735, 'Michael Davis', 'mdavis@gamerush.co', '+16463012345'), + (11, 'FashionTrend', 'Trendy clothing and accessories', 'https://www.fashiontrend.com', 'https://www.fashiontrend.com/logo.png', '2019-08-10 00:00:00', 'UK', NULL, 'Manchester', 'M2 4WU', '87 Deansgate, Manchester M2 4WU', 'active', 'Retail', 'Apparel', 5651, 'Emma Thompson', 'ethompson@fashiontrend.com', '+441612345678'), + (12, 'GreenGourmet', 'Organic foods and natural products', 'https://www.greengourmet.com', 'https://www.greengourmet.com/logo.jpg', '2020-12-05 00:00:00', 'CA', 'British Columbia', 'Vancouver', 'V6B 6B1', '850 W Hastings St, Vancouver BC V6B 6B1', 'active', 'Food & Dining', 'Groceries', 5411, 'Daniel Lee', 'dlee@greengourmet.com', '+16041234567'), + (13, 'PetParadise', 'Pet supplies and accessories', 'https://petparadise.com', 'https://petparadise.com/logo.png', '2018-03-20 00:00:00', 'AU', 'New South Wales', 'Sydney', '2000', '275 Pitt St, Sydney NSW 2000', 'active', 'Retail', 'Pets', 5995, 'Olivia Johnson', 'ojohnson@petparadise.com', '+61298765432'), + (14, 'HomeTechSolutions', 'Smart home devices and gadgets', 'https://hometechsolutions.net', 'https://hometechsolutions.net/logo.png', '2022-04-15 00:00:00', 'US', 'California', 'San Francisco', '94105', '350 Mission St, San Francisco CA 94105', 'active', 'Retail', 'Home Appliances', 5734, 'Ethan Brown', 'ebrown@hometechsolutions.net', '+14159876543'), + (15, 'BookWorms', 'Books and reading accessories', 'https://bookworms.co.uk', 'https://bookworms.co.uk/logo.png', '2017-06-30 00:00:00', 'UK', NULL, 'London', 'WC2H 9JA', '66-67 Tottenham Court Rd, London WC2H 9JA', 'active', 'Retail', 'Books', 5942, 'Sophia Turner', 'sturner@bookworms.co.uk', '+442078912345'); + + DELETE FROM DEFOG.EWALLET.COUPONS; + INSERT INTO DEFOG.EWALLET.COUPONS (cid, merchant_id, code, description, start_date, end_date, discount_type, discount_value, min_purchase_amount, max_discount_amount, redemption_limit, status, created_at, updated_at) +VALUES + (1, 1, 'TECH20', '20% off tech and electronics', '2023-05-01', '2023-05-31', 'percentage', 20.00, 100.00, NULL, 500, 'active', '2023-04-01 09:00:00', '2023-04-15 11:30:00'), + (2, 2, 'NEWYEAR30', '30% off workout gear', '2023-01-01', '2023-01-15', 'percentage', 30.00, NULL, NULL, 1000, 'expired', '2022-12-01 12:00:00', '2023-01-16 18:45:00'), + (3, 3, 'DINEDISCOUNT', 'Get $10 off $50 order', '2023-06-01', '2023-06-30', 'fixed_amount', 10.00, 50.00, 10.00, NULL, 'active', '2023-05-15 15:30:00', NULL), + (4, 4, 'HOME15', '15% off weekly rental', '2023-07-01', '2023-08-31', 'percentage', 15.00, 1000.00, 300.00, 200, 'active', '2023-05-01 09:15:00', NULL), + (5, 5, 'HOME10', '$10 off $75+ purchase', '2023-04-01', '2023-04-30', 'fixed_amount', 10.00, 75.00, 10.00, 300, 'inactive', '2023-03-01 14:00:00', '2023-05-05 10:30:00'), + (6, 6, 'CODENEW25', '25% off new subscriptions', '2023-03-01', '2023-03-31', 'percentage', 25.00, NULL, NULL, NULL, 'expired', '2023-02-15 11:00:00', '2023-04-01 09:30:00'), + (7, 7, 'ZENHOME', 'Get 20% off home items', '2023-09-01', '2023-09-30', 'percentage', 20.00, 50.00, NULL, 1500, 'active', '2023-08-15 16:45:00', NULL), + (8, 8, 'GAMEKIDS', '$15 off $100+ purchase', '2022-12-01', '2022-12-31', 'fixed_amount', 15.00, 100.00, 15.00, 800, 'expired', '2022-11-01 10:30:00', '2023-01-02 13:15:00'), + (9, 9, 'GLOWUP', 'Buy 2 get 1 free on cosmetics', '2023-10-15', '2023-10-31', 'fixed_amount', 50.00, 150.00, 50.00, 300, 'active', '2023-10-01 08:00:00', NULL), + (10, 10, 'GAMERALERT', 'Get 25% off accessories', '2023-03-01', '2023-03-15', 'percentage', 25.00, NULL, 50.00, 750, 'expired', '2023-02-15 14:30:00', '2023-03-16 12:00:00'); + + + DELETE FROM DEFOG.EWALLET.WALLET_TRANSACTIONS_DAILY; + INSERT INTO DEFOG.EWALLET.WALLET_TRANSACTIONS_DAILY (txid, sender_id, sender_type, receiver_id, receiver_type, amount, status, type, description, coupon_id, created_at, completed_at, transaction_ref, gateway_name, gateway_ref, device_id, ip_address, user_agent) +VALUES + (1, 1, 0, 1, 0, 99.99, 'success', 'debit', 'Online purchase', NULL, '2023-06-01 10:15:30', '2023-06-01 10:15:45', 'ad154bf7-8185-4230-a8d8-3ef59b4e0012', 'Stripe', 'tx_123abc456def', 'mobile_8fh2k1', '192.168.0.1', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_3_1 like Mac OS X) ...'), + (2, 1, 0, 1, 1, 20.00, 'success', 'credit', 'Coupon discount', 1, '2023-06-01 10:15:30', '2023-06-01 10:15:45', 'ad154bf7-8185-4230-a8d8-3ef59b4e0012', 'Stripe', 'tx_123abc456def', 'mobile_8fh2k1', '192.168.0.1', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_3_1 like Mac OS X) ...'), + (3, 2, 0, 1, 1, 16.00, 'success', 'credit', 'Coupon discount', 1, '2023-07-01 10:18:30', '2023-06-01 10:18:45', 'kd454bf7-428d-eig2-a8d8-3ef59b4e0012', 'Stripe', 'tx_123abc789gas', 'mobile_yjp08q', '198.51.100.233', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4 like Mac OS X) ...'), + (4, 3, 1, 9, 0, 125.50, 'success', 'debit', 'Product purchase', NULL, '2023-06-01 13:22:18', '2023-06-01 13:22:45', 'e6f510e9-ff7d-4914-81c2-f8e56bae4012', 'PayPal', 'ppx_192ks8hl', 'web_k29qjd', '216.58.195.68', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ...'), + (5, 9, 0, 3, 1, 42.75, 'pending', 'debit', 'Order #438721', 3, '2023-06-01 18:45:02', '2023-06-01 18:45:13', 'b2ca190e-a42f-4f5e-8318-f82bcc6ae64e', 'Stripe', 'tx_987zyx654wvu', 'mobile_q3mz8n', '68.85.32.201', 'Mozilla/5.0 (Linux; Android 13) ...'), + (6, 9, 0, 3, 1, 10.00, 'success', 'credit', 'Coupon discount', 3, '2023-06-01 18:45:02', '2023-06-01 18:45:13', 'b2ca190e-a42f-4f5e-8318-f82bcc6ae64e', 'Stripe', 'tx_987zyx654wvu', 'mobile_q3mz8n', '68.85.32.201', 'Mozilla/5.0 (Linux; Android 13) ...'), + (7, 2, 0, 7, 1, 89.99, 'pending', 'debit', 'Home furnishings', NULL, '2023-06-02 09:30:25', '2023-06-02 09:30:40', 'c51e10d1-db34-4d9f-b55f-43a05a5481c8', 'Checkout.com', 'ord_kzhg123', 'mobile_yjp08q', '198.51.100.233', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4 like Mac OS X) ...'), + (8, 2, 0, 7, 1, 17.99, 'success', 'credit', 'Coupon discount', 7, '2023-06-02 09:30:25', '2023-06-02 09:30:40', 'c51e10d1-db34-4d9f-b55f-43a05a5481c8', 'Checkout.com', 'ord_kzhg123', 'mobile_yjp08q', '198.51.100.233', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4 like Mac OS X) ...'), + (9, 6, 1, 1, 0, 29.95, 'success', 'debit', 'Software subscription', NULL, '2023-06-02 14:15:00', '2023-06-02 14:15:05', '25cd48e5-08c3-4d1c-b7a4-26485ea646eb', 'Braintree', 'sub_mnb456', 'web_zz91p44l', '4.14.15.90', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ...'), + (10, 4, 0, 4, 1, 2500.00, 'pending', 'debit', 'Villa rental deposit', NULL, '2023-06-02 20:45:36', NULL, 'a7659c81-0cd0-4635-af6c-cf68d2c15ab2', 'PayPal', NULL, 'mobile_34jdkl', '143.92.64.138', 'Mozilla/5.0 (Linux; Android 11; Pixel 5) ...'), + (11, 5, 0, 5, 1, 55.99, 'success', 'debit', 'Craft supplies order', NULL, '2023-06-03 11:12:20', '2023-06-03 11:12:35', 'ec74cb3b-8272-4175-a5d0-f03c2e781593', 'Adyen', 'ord_tkjs87', 'web_8902wknz', '192.64.112.188', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ...'), + (12, 9, 0, 9, 1, 75.00, 'success', 'debit', 'Beauty products', 9, '2023-06-04 08:00:00', '2023-06-04 08:00:25', '840a9854-1b07-422b-853c-636b289222a9', 'Checkout.com', 'ord_kio645', 'mobile_g3mjfz', '203.96.81.36', 'Mozilla/5.0 (Linux; Android 12; SM-S906N Build/QP1A.190711.020) ...'), + (13, 9, 0, 9, 1, 50.00, 'success', 'credit', 'Coupon discount', 9, '2023-06-04 08:00:00', '2023-06-04 08:00:25', '840a9854-1b07-422b-853c-636b289222a9', 'Checkout.com', 'ord_kio645', 'mobile_g3mjfz', '203.96.81.36', 'Mozilla/5.0 (Linux; Android 12; SM-S906N Build/QP1A.190711.020) ...'), + (14, 8, 0, 10, 1, 119.99, 'failed', 'debit', 'New game purchase', NULL, '2023-06-04 19:30:45', NULL, '32e2b29c-5c7f-4906-98c5-e8abdcbfd69a', 'Braintree', 'ord_mjs337', 'web_d8180kaf', '8.26.53.165', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ...'), + (15, 8, 0, 10, 1, 29.99, 'success', 'credit', 'Coupon discount', 10, '2023-06-04 19:30:45', '2023-06-04 19:31:10', '32e2b29c-5c7f-4906-98c5-e8abdcbfd69a', 'Braintree', 'ord_mjs337', 'web_d8180kaf', '8.26.53.165', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ...'), + (16, 10, 1, 3, 0, 87.50, 'failed', 'debit', 'Restaurant order', NULL, '2023-06-05 12:05:21', NULL, '37cf052d-0475-4ecc-bda7-73ee904bf65c', 'Checkout.com', NULL, 'mobile_x28qlj', '92.110.51.150', 'Mozilla/5.0 (Linux; Android 13; SM-S901B) ...'), + (17, 1, 0, 1, 0, 175.00, 'success', 'debit', 'Refund on order #1234', NULL, '2023-06-06 14:20:00', '2023-06-06 14:20:05', 'a331232e-a3f6-4e7f-b49f-3588bc5ff985', 'Stripe', 'rfnd_xkt521', 'web_33lq1dh', '38.75.197.8', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) ...'), + (18, 7, 1, 2, 0, 599.99, 'success', 'debit', 'Yearly subscription', NULL, '2023-06-06 16:55:10', '2023-06-06 16:55:15', 'ed6f46ab-9617-4d11-9aa9-60d24bdf9bc0', 'PayPal', 'sub_pjj908', 'web_zld22f', '199.59.148.201', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ...'), + (19, 2, 0, 2, 1, 22.99, 'refunded', 'debit', 'Product return', NULL, '2023-06-07 10:10:30', '2023-06-07 10:11:05', '6c97a87d-610f-4705-ae97-55071127d9ad', 'Adyen', 'tx_zcx258', 'mobile_1av8p0', '70.121.39.25', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4 like Mac OS X) ...'), + (20, 2, 0, 2, 1, 22.99, 'success', 'credit', 'Refund on return', NULL, '2023-06-07 10:10:30', '2023-06-07 10:11:05', '6c97a87d-610f-4705-ae97-55071127d9ad', 'Adyen', 'tx_zcx258', 'mobile_1av8p0', '70.121.39.25', 'Mozilla/5.0 (iPhone; CPU iPhone OS 16_4 like Mac OS X) ...'), + (21, 1, 0, 2, 1, 49.99, 'success', 'debit', 'Product purchase', NULL, CURRENT_TIMESTAMP - INTERVAL '5 months', CURRENT_TIMESTAMP - INTERVAL '5 months', 'tx_ref_11_1', 'Stripe', 'stripe_ref_11_1', 'device_11_1', '192.168.1.11', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'), + (22, 4, 0, 3, 1, 99.99, 'success', 'debit', 'Service purchase', NULL, CURRENT_TIMESTAMP - INTERVAL '4 months', CURRENT_TIMESTAMP - INTERVAL '4 months', 'tx_ref_12_1', 'PayPal', 'paypal_ref_12_1', 'device_12_1', '192.168.1.12', 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.2 Mobile/15E148 Safari/604.1'), + (23, 4, 0, 1, 1, 149.99, 'success', 'debit', 'Subscription purchase', NULL, CURRENT_TIMESTAMP - INTERVAL '3 months', CURRENT_TIMESTAMP - INTERVAL '3 months', 'tx_ref_13_1', 'Stripe', 'stripe_ref_13_1', 'device_13_1', '192.168.1.13', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'), + (24, 2, 0, 5, 1, 199.99, 'pending', 'debit', 'Product purchase', NULL, CURRENT_TIMESTAMP - INTERVAL '2 months', CURRENT_TIMESTAMP - INTERVAL '2 months', 'tx_ref_14_1', 'PayPal', 'paypal_ref_14_1', 'device_14_1', '192.168.1.14', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0'), + (25, 2, 0, 1, 1, 249.99, 'success', 'debit', 'Service purchase', NULL, CURRENT_TIMESTAMP - INTERVAL '1 month', CURRENT_TIMESTAMP - INTERVAL '1 month', 'tx_ref_15_1', 'Stripe', 'stripe_ref_15_1', 'device_15_1', '192.168.1.15', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36'), + (26, 7, 1, 2, 0, 299.99, 'success', 'debit', 'Renew subscription', NULL, CURRENT_TIMESTAMP - INTERVAL '3 weeks', CURRENT_TIMESTAMP - INTERVAL '3 weeks', 'ed6f46ab-9617-4d11-9aa9-55071127d9ad', 'PayPal', 'sub_pjk832', 'web_zld22f', '199.59.148.201', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ...'); + + + DELETE FROM DEFOG.EWALLET.WALLET_USER_BALANCE_DAILY; + INSERT INTO DEFOG.EWALLET.WALLET_USER_BALANCE_DAILY (user_id, balance, updated_at) +VALUES + (1, 525.80, '2023-06-07 23:59:59'), + (2, 429.76, '2023-06-07 23:59:59'), + (3, -725.55, '2023-06-07 23:59:59'), + (4, -2500.00, '2023-06-07 23:59:59'), + (5, -55.99, '2023-06-07 23:59:59'), + (6, 0.00, '2023-06-07 23:59:59'), + (7, 0.00, '2023-06-07 23:59:59'), + (8, -599.98, '2023-06-07 23:59:59'), + (9, -183.25, '2023-06-07 23:59:59'), + (10, 0.00, '2023-06-07 23:59:59'), + (1, 2739.10, CURRENT_TIMESTAMP - INTERVAL '8 days'), + (1, 2738.12, CURRENT_TIMESTAMP - INTERVAL '6 days'), + (1, 2733.92, CURRENT_TIMESTAMP - INTERVAL '3 days'), + (2, 155.24, CURRENT_TIMESTAMP - INTERVAL '7 days'), + (3, 2775.25, CURRENT_TIMESTAMP - INTERVAL '6 days'), + (4, 2500.00, CURRENT_TIMESTAMP - INTERVAL '5 days'), + (5, 155.99, CURRENT_TIMESTAMP - INTERVAL '4 days'), + (6, 29.95, CURRENT_TIMESTAMP - INTERVAL '3 days'), + (7, 172.98, CURRENT_TIMESTAMP - INTERVAL '2 days'), + (8, 0.00, CURRENT_TIMESTAMP - INTERVAL '7 days'), + (9, 125.00, CURRENT_TIMESTAMP - INTERVAL '3 days'), + (10, 219.98, CURRENT_TIMESTAMP - INTERVAL '1 days'); + + DELETE FROM DEFOG.EWALLET.WALLET_MERCHANT_BALANCE_DAILY; + INSERT INTO DEFOG.EWALLET.WALLET_MERCHANT_BALANCE_DAILY (merchant_id, balance, updated_at) +VALUES + (1, 3897.99, '2023-06-07 23:59:59'), + (2, 155.24, '2023-06-07 23:59:59'), + (3, 2775.25, '2023-06-07 23:59:59'), + (4, 2500.00, '2023-06-07 23:59:59'), + (5, 155.99, '2023-06-07 23:59:59'), + (6, 29.95, '2023-06-07 23:59:59'), + (7, 172.98, '2023-06-07 23:59:59'), + (8, 0.00, '2023-06-07 23:59:59'), + (9, 125.00, '2023-06-07 23:59:59'), + (10, 219.98, '2023-06-07 23:59:59'), + (1, 82.10, CURRENT_DATE - INTERVAL '8 days'), + (2, 82.12, CURRENT_DATE - INTERVAL '8 days'), + (1, 82.92, CURRENT_DATE - INTERVAL '7 days'), + (2, 55.24, CURRENT_DATE - INTERVAL '7 days'), + (3, 75.25, CURRENT_DATE - INTERVAL '7 days'), + (1, 50.00, CURRENT_DATE), + (2, 55.99, CURRENT_DATE), + (3, 29.95, CURRENT_DATE), + (4, 89.99, CURRENT_DATE), + (5, 599.99, CURRENT_DATE); + + DELETE FROM DEFOG.EWALLET.NOTIFICATIONS; + INSERT INTO DEFOG.EWALLET.NOTIFICATIONS (id, user_id, message, type, status, created_at, device_type, device_id, action_url) +VALUES +(1, 1, 'Your order #123abc has been shipped!', 'transaction', 'unread', '2023-06-01 10:16:00', 'mobile_app', 'mobile_8fh2k1', 'app://orders/123abc'), +(2, 1, 'Get 20% off your next purchase! Limited time offer.', 'promotion', 'unread', '2023-06-02 09:00:00', 'email', NULL, 'https://techmart.com/promo/TECH20'), +(3, 2, 'A package is being returned to you. Refund processing...', 'transaction', 'read', '2023-06-07 10:12:00', 'mobile_app', 'mobile_1av8p0', 'app://orders?status=returned'), +(4, 2, 'Your FitLife membership is up for renewal on 7/1', 'general', 'unread', '2023-06-05 15:30:00', 'email', NULL, 'https://fitlifegear.com/renew'), +(5, 3, 'An order from UrbanDining was unsuccessful', 'transaction', 'read', '2023-06-05 12:06:00', 'sms', NULL, 'https://urbandining.co/orders/37cf052d'), +(6, 4, 'Your rental request is pending approval', 'transaction', 'unread', '2023-06-02 20:46:00', 'mobile_app', 'mobile_34jdkl', 'app://bookings/a7659c81'), +(7, 5, 'Claim your 25% discount on craft supplies!', 'promotion', 'archived', '2023-06-01 08:00:00', 'email', NULL, 'https://handycraft.store/CRAFTY10'), +(8, 6, 'Your CodeSuite subscription will renew on 7/1', 'general', 'unread', '2023-06-01 12:00:00', 'email', NULL, 'https://codesuite.io/subscriptions'), +(9, 7, 'Thanks for shopping at ZenHomeGoods! How did we do?', 'general', 'read', '2023-06-02 09:31:00', 'mobile_app', 'mobile_yjp08q', 'https://zenhomesurvey.com/order/c51e10d1'), +(10, 8, 'Playtime! New games and toys have arrived', 'promotion', 'archived', '2023-06-01 18:00:00', 'email', NULL, 'https://kidzplayhouse.com/new-arrivals'), +(11, 9, 'Here''s $10 to start your glow up!', 'promotion', 'unread', '2023-06-01 10:15:00', 'email', NULL, 'https://beautytrending.com/new-customer'), +(12, 10, 'Your order #ord_mjs337 is being processed', 'transaction', 'read', '2023-06-04 19:31:30', 'web_app', 'web_d8180kaf', 'https://gamerush.co/orders/32e2b29c'), +(13, 1, 'New promotion: Get 10% off your next order!', 'promotion', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '1 week', 'email', NULL, 'https://techmart.com/promo/TECH10'), +(14, 1, 'Your order #456def has been delivered', 'transaction', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '2 weeks', 'mobile_app', 'mobile_8fh2k1', 'app://orders/456def'), +(15, 2, 'Reminder: Your FitLife membership expires in 7 days', 'general', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '3 weeks', 'email', NULL, 'https://fitlifegear.com/renew'), +(16, 2, 'Weekend Flash Sale: 25% off all activewear!', 'promotion', 'unread', DATE_TRUNC('week', CURRENT_TIMESTAMP) - INTERVAL '1 week' + INTERVAL '2 days', 'mobile_app', 'mobile_yjp08q', 'app://shop/activewear'); + + DELETE FROM DEFOG.EWALLET.USER_SESSIONS; + INSERT INTO DEFOG.EWALLET.USER_SESSIONS (user_id, session_start_ts, session_end_ts, device_type, device_id) +VALUES +(1, '2023-06-01 09:45:22', '2023-06-01 10:20:35', 'mobile_app', 'mobile_8fh2k1'), +(1, '2023-06-02 13:30:00', '2023-06-02 14:15:15', 'web_app', 'web_33lq1dh'), +(1, '2023-06-06 14:19:00', '2023-06-06 14:22:10', 'web_app', 'web_33lq1dh'), +(1, '2023-06-07 23:49:12', '2023-06-08 00:00:00', 'web_app', 'web_33lq1dh'), +(2, '2023-06-02 08:55:08', '2023-06-02 09:45:42', 'mobile_app', 'mobile_yjp08q'), +(2, '2023-06-07 10:09:15', '2023-06-07 10:12:25', 'mobile_app', 'mobile_1av8p0'), +(3, '2023-06-01 13:15:33', '2023-06-01 13:28:01', 'web_app', 'web_k29qjd'), +(3, '2023-06-05 12:00:00', '2023-06-05 12:10:22', 'mobile_app', 'mobile_x28qlj'), +(4, '2023-06-02 20:30:12', '2023-06-02 21:15:48', 'mobile_app', 'mobile_34jdkl'), +(5, '2023-06-03 10:45:30', '2023-06-03 11:20:28', 'web_app', 'web_8902wknz'), +(6, '2023-06-02 14:00:00', '2023-06-02 15:10:05', 'web_app', 'web_zz91p44l'), +(7, '2023-06-06 16:45:22', '2023-06-06 17:10:40', 'web_app', 'web_zld22f'), +(8, '2023-06-04 19:25:15', '2023-06-04 19:40:20', 'web_app', 'web_d8180kaf'), +(8, '2023-06-01 17:30:00', '2023-06-01 18:15:35', 'mobile_app', 'mobile_q3mz8n'), +(9, '2023-06-04 07:45:30', '2023-06-04 08:15:27', 'mobile_app', 'mobile_g3mjfz'), +(10, '2023-06-02 14:10:15', '2023-06-02 14:40:58', 'web_app', 'web_zz91p44l'), +(5, CURRENT_TIMESTAMP - INTERVAL '32 days', CURRENT_TIMESTAMP - INTERVAL '32 days' + INTERVAL '15 min', 'web_app', 'web_8902wknz'), +(6, CURRENT_TIMESTAMP - INTERVAL '8 days', CURRENT_TIMESTAMP - INTERVAL '8 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), +(7, CURRENT_TIMESTAMP - INTERVAL '5 days', CURRENT_TIMESTAMP - INTERVAL '5 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), +(8, CURRENT_TIMESTAMP - INTERVAL '3 days', CURRENT_TIMESTAMP - INTERVAL '3 days' + INTERVAL '15 min', 'web_app', 'web_d8180kaf'), +(9, CURRENT_TIMESTAMP - INTERVAL '1 days', CURRENT_TIMESTAMP - INTERVAL '1 days' + INTERVAL '15 min', 'mobile_app', 'mobile_g3mjfz'), +(10, CURRENT_TIMESTAMP - INTERVAL '2 days', CURRENT_TIMESTAMP - INTERVAL '2 days' + INTERVAL '15 min', 'web_app', 'web_zz91p44l'), +(5, CURRENT_TIMESTAMP - INTERVAL '2 days', CURRENT_TIMESTAMP - INTERVAL '2 days' + INTERVAL '15 min', 'web_app', 'web_8902wknz') +; + + DELETE FROM DEFOG.EWALLET.USER_SETTING_SNAPSHOT; + INSERT INTO DEFOG.EWALLET.USER_SETTING_SNAPSHOT (user_id, snapshot_date, tx_limit_daily, tx_limit_monthly, membership_status, password_hash, api_key, verified_devices, verified_ips, mfa_enabled, marketing_opt_in, created_at) +VALUES +(1, '2023-06-07', 1000.00, 5000.00, 2, 'bcryptHash($2yz9!&ka1)', '9d61c49b-8977-4914-a36b-80d1445e38fa', 'mobile_8fh2k1', '192.168.0.1', true, false, '2023-06-07 00:00:00'), +(2, '2023-06-07', 500.00, 2500.00, 1, 'bcryptHash(qpwo9874zyGk!)', NULL, 'mobile_yjp08q, mobile_1av8p0', '198.51.100.233, 70.121.39.25', false, true, '2023-06-07 00:00:00'), +(3, '2023-06-07', 2000.00, 10000.00, 3, 'bcryptHash(Fr3nchPa1n!@98zy)', 'e785f611-fdd8-4c2d-a870-e104358712e5', 'web_k29qjd, mobile_x28qlj', '216.58.195.68, 92.110.51.150', true, false, '2023-06-07 00:00:00'), +(4, '2023-06-07', 5000.00, 20000.00, 4, 'bcryptHash(Vacay2023*&!Rm)', NULL, 'mobile_34jdkl', '143.92.64.138', false, true, '2023-06-07 00:00:00'), +(5, '2023-06-07', 100.00, 500.00, 0, 'bcryptHash(cRaf7yCr8zy)', NULL, 'web_8902wknz', '192.64.112.188', false, false, '2023-06-07 00:00:00'), +(6, '2023-06-07', 50.00, 500.00, 1, 'bcryptHash(C0d3Rul3z!99)', '6c03c175-9ac9-4854-b064-a3fff2c62e31', 'web_zz91p44l', '4.14.15.90', true, true, '2023-06-07 00:00:00'), +(7, '2023-06-07', 250.00, 1000.00, 2, 'bcryptHash(zEnH0me&Pw7)', NULL, NULL, NULL, false, true, '2023-06-07 00:00:00'), +(8, '2023-06-07', 200.00, 1000.00, 0, 'bcryptHash(K1dzPlay!&Rt8)', NULL, 'web_d8180kaf, mobile_q3mz8n', '8.26.53.165, 68.85.32.201', false, false, '2023-06-07 00:00:00'), +(9, '2023-06-07', 150.00, 1000.00, 2, 'bcryptHash(Gl0wUp7!9zy)', NULL, 'mobile_g3mjfz', '203.96.81.36', true, true, '2023-06-07 00:00:00'), +(10, '2023-06-07', 300.00, 2000.00, 1, 'bcryptHash(GamzRu1ez*&99!)', NULL, 'web_d8180kaf', '8.26.53.165', false, true, '2023-06-07 00:00:00'), +(1, '2023-06-01', 502.00, 1000.00, 2, 'bcryptHash($2yz9!&ka1)', '9d61c49b-8977-4914-a36b-80d1445e38fa', 'mobile_8fh2k1', '192.168.0.1', false, true, '2023-06-01 06:00:00'), +(2, '2023-06-01', 500.00, 2500.00, 1, 'bcryptHash(qpwo9874zyGk!)', NULL, 'mobile_yjp08q', '198.51.100.233, 70.121.39.25', true, false, '2023-06-01 09:00:00'); +------------------------------------------------------------------------------- + + DELETE FROM DEFOG.DEALERSHIP.CARS; + INSERT INTO DEFOG.DEALERSHIP.CARS (id, make, model, year, color, vin_number, engine_type, transmission, cost) +VALUES + (1, 'Toyota', 'Camry', 2022, 'Silver', '4T1BF1FK3CU510984', 'V6', 'Automatic', 28500.00), + (2, 'Honda', 'Civic', 2021, 'platinum/grey', '2HGFC2F53MH522780', 'Inline 4', 'CVT', 22000.00), + (3, 'Ford', 'Mustang', 2023, 'blue', '1FA6P8TH4M5100001', 'V8', 'Manual', 45000.00), + (4, 'Tesla', 'Model 3', 2022, 'fuschia', '5YJ3E1EB7MF123456', 'Electric', 'Automatic', 41000.00), + (5, 'Chevrolet', 'Equinox', 2021, 'midnight blue', '2GNAXUEV1M6290124', 'Inline 4', 'Automatic', 26500.00), + (6, 'Nissan', 'Altima', 2022, 'Jet black', '1N4BL4BV4NN123456', 'V6', 'CVT', 25000.00), + (7, 'BMW', 'X5', 2023, 'Titan Silver', '5UXCR6C56M9A12345', 'V8', 'Automatic', 62000.00), + (8, 'Audi', 'A4', 2022, 'Blue', 'WAUBNAF47MA098765', 'Inline 4', 'Automatic', 39000.00), + (9, 'Lexus', 'RX350', 2021, 'Fiery red', '2T2BZMCA7MC143210', 'V6', 'Automatic', 45500.00), + (10, 'Subaru', 'Outback', 2022, 'Jade', '4S4BSANC2N3246801', 'Boxer 4', 'CVT', 28000.00), + (11, 'Mazda', 'CX-5', 2022, 'Royal Purple', 'JM3KE4DY4N0123456', 'Inline 4', 'Automatic', 29000.00), + (12, 'Hyundai', 'Tucson', 2023, 'black', 'KM8J3CAL3NU123456', 'Inline 4', 'Automatic', 32000.00), + (13, 'Kia', 'Sorento', 2021, 'ebony black', '5XYPH4A50MG987654', 'V6', 'Automatic', 32000.00), + (14, 'Jeep', 'Wrangler', 2022, 'Harbor Gray', '1C4HJXDG3NW123456', 'V6', 'Automatic', 38000.00), + (15, 'GMC', 'Sierra 1500', 2023, 'Snow White', '1GTU9CED3NZ123456', 'V8', 'Automatic', 45000.00), + (16, 'Ram', '1500', 2022, 'baby blue', '1C6SRFFT3NN123456', 'V8', 'Automatic', 42000.00), + (17, 'Mercedes-Benz', 'E-Class', 2021, 'Silver', 'W1KZF8DB1MA123456', 'Inline 6', 'Automatic', 62000.00), + (18, 'Volkswagen', 'Tiguan', 2022, 'Red', '3VV2B7AX1NM123456', 'Inline 4', 'Automatic', 32000.00), + (19, 'Volvo', 'XC90', 2023, 'black', 'YV4A22PK3N1234567', 'Inline 4', 'Automatic', 65000.00), + (20, 'Porsche', '911', 2022, 'white', 'WP0AA2A93NS123456', 'Flat 6', 'Automatic', 120000.00), + (21, 'Cadillac', 'Escalade', 2023, 'Black', '1GYS4HKJ3MR123456', 'V8', 'Automatic', 85000.00); + + DELETE FROM DEFOG.DEALERSHIP.SALESPERSONS; + INSERT INTO DEFOG.DEALERSHIP.SALESPERSONS (id, first_name, last_name, email, phone, hire_date, termination_date) +VALUES + (1, 'John', 'Doe', 'john.doe@autonation.com', '(555)-123-4567', CURRENT_DATE - INTERVAL '2 years', NULL), + (2, 'Jane', 'Smith', 'jane.smith@autonation.com', '(415)-987-6543', CURRENT_DATE - INTERVAL '3 years', NULL), + (3, 'Michael', 'Johnson', 'michael.johnson@autonation.com', '(555)-456-7890', CURRENT_DATE - INTERVAL '1 year', NULL), + (4, 'Emily', 'Brown', 'emily.brown@sonicauto.com', '(444)-111-2222', CURRENT_DATE - INTERVAL '1 year', CURRENT_DATE - INTERVAL '1 month'), + (5, 'David', 'Wilson', 'david.wilson@sonicauto.com', '(444)-333-4444', CURRENT_DATE - INTERVAL '2 years', NULL), + (6, 'Sarah', 'Taylor', 'sarah.taylor@sonicauto.com', '(123)-555-6666', '2018-09-01', '2022-09-01'), + (7, 'Daniel', 'Anderson', 'daniel.anderson@sonicauto.com', '(555)-777-8888', '2021-07-12', NULL), + (8, 'Olivia', 'Thomas', 'olivia.thomas@pensake.com', '(333)-415-0000', '2023-01-25', '2023-07-25'), + (9, 'James', 'Jackson', 'james.jackson@pensake.com', '(555)-212-3333', '2019-04-30', NULL), + (10, 'Sophia', 'White', 'sophia.white@pensake.com', '(555)-444-5555', '2022-08-18', NULL), + (11, 'Robert', 'Johnson', 'robert.johnson@pensake.com', '(001)-415-5678', CURRENT_DATE - INTERVAL '15 days', NULL), + (12, 'Jennifer', 'Davis', 'jennifer.davis@directauto.com', '(555)-345-6789', CURRENT_DATE - INTERVAL '20 days', NULL), + (13, 'Jessica', 'Rodriguez', 'jessica.rodriguez@directauto.com', '(555)-789-0123', '2022-06-01', NULL); + + -- customers + DELETE FROM DEFOG.DEALERSHIP.CUSTOMERS; + INSERT INTO DEFOG.DEALERSHIP.CUSTOMERS (id, first_name, last_name, email, phone, address, city, state, zip_code, crtd_ts) +VALUES + (1, 'William', 'Davis', 'william.davis@example.com', '555-888-9999', '123 Main St', 'New York', 'NY', '10001', CURRENT_TIMESTAMP - INTERVAL '5 years'), + (2, 'Ava', 'Miller', 'ava.miller@example.com', '555-777-6666', '456 Oak Ave', 'Los Angeles', 'CA', '90001', CURRENT_TIMESTAMP - INTERVAL '4 years'), + (3, 'Benjamin', 'Wilson', 'benjamin.wilson@example.com', '555-666-5555', '789 Elm St', 'Chicago', 'IL', '60007', CURRENT_TIMESTAMP - INTERVAL '3 years'), + (4, 'Mia', 'Moore', 'mia.moore@example.com', '555-555-4444', '321 Pine Rd', 'Houston', 'TX', '77001', CURRENT_TIMESTAMP - INTERVAL '2 years'), + (5, 'Henry', 'Taylor', 'henry.taylor@example.com', '555-444-3333', '654 Cedar Ln', 'Phoenix', 'AZ', '85001', CURRENT_TIMESTAMP - INTERVAL '1 year'), + (6, 'Charlotte', 'Anderson', 'charlotte.anderson@example.com', '555-333-2222', '987 Birch Dr', 'Philadelphia', 'PA', '19019', CURRENT_TIMESTAMP - INTERVAL '5 years'), + (7, 'Alexander', 'Thomas', 'alexander.thomas@example.com', '555-222-1111', '741 Walnut St', 'San Antonio', 'TX', '78006', CURRENT_TIMESTAMP - INTERVAL '4 years'), + (8, 'Amelia', 'Jackson', 'amelia.jackson@gmail.com', '555-111-0000', '852 Maple Ave', 'San Diego', 'CA', '92101', CURRENT_TIMESTAMP - INTERVAL '3 years'), + (9, 'Daniel', 'White', 'daniel.white@youtube.com', '555-000-9999', '963 Oak St', 'Dallas', 'TX', '75001', CURRENT_TIMESTAMP - INTERVAL '2 years'), + (10, 'Abigail', 'Harris', 'abigail.harris@company.io', '555-999-8888', '159 Pine Ave', 'San Jose', 'CA', '95101', CURRENT_TIMESTAMP - INTERVAL '1 year'), + (11, 'Christopher', 'Brown', 'christopher.brown@ai.com', '555-456-7890', '753 Maple Rd', 'Miami', 'FL', '33101', CURRENT_TIMESTAMP - INTERVAL '5 months'), + (12, 'Sophia', 'Lee', 'sophia.lee@microsoft.com', '555-567-8901', '951 Oak Ln', 'Seattle', 'WA', '98101', CURRENT_TIMESTAMP - INTERVAL '6 months'), + (13, 'Michael', 'Chen', 'michael.chen@company.com', '(555)-456-7890', '123 Oak St', 'San Francisco', 'CA', '94101', CURRENT_TIMESTAMP - INTERVAL '3 months'); + + DELETE FROM DEFOG.DEALERSHIP.SALES; + INSERT INTO DEFOG.DEALERSHIP.SALES (id, car_id, salesperson_id, customer_id, sale_price, sale_date) +VALUES + (1, 1, 2, 3, 30500.00, '2023-03-15'), + (2, 3, 1, 5, 47000.00, '2023-03-20'), + (3, 6, 4, 2, 26500.00, '2023-03-22'), + (4, 8, 7, 9, 38000.00, '2023-03-25'), + (5, 2, 4, 7, 23500.00, '2023-03-28'), + (6, 10, 6, 1, 30000.00, '2023-04-01'), + (7, 5, 3, 6, 26800.00, '2023-04-05'), + (8, 7, 2, 10, 63000.00, '2023-04-10'), + (9, 4, 6, 8, 42500.00, '2023-04-12'), + (10, 9, 2, 4, 44500.00, '2023-04-15'), + (11, 1, 7, 11, 28900.00, CURRENT_DATE - INTERVAL '32 days'), + (12, 3, 3, 12, 46500.00, CURRENT_DATE - INTERVAL '10 days'), + (13, 6, 1, 11, 26000.00, CURRENT_DATE - INTERVAL '15 days'), + (14, 2, 3, 1, 23200.00, CURRENT_DATE - INTERVAL '21 days'), + (15, 8, 6, 12, 43500.00, CURRENT_DATE - INTERVAL '3 days'), + (16, 10, 4, 2, 29500.00, CURRENT_DATE - INTERVAL '5 days'), + (17, 3, 2, 3, 46000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' + INTERVAL '1 day'), + (18, 3, 2, 7, 47500.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week'), + (19, 3, 2, 10, 46500.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' - INTERVAL '1 day'), + (20, 4, 1, 3, 48000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' + INTERVAL '1 day'), + (21, 4, 1, 7, 45000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week'), + (22, 4, 1, 10, 49000.00, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' - INTERVAL '1 day'); + + + DELETE FROM DEFOG.DEALERSHIP.INVENTORY_SNAPSHOTS; + INSERT INTO DEFOG.DEALERSHIP.INVENTORY_SNAPSHOTS (id, snapshot_date, car_id, is_in_inventory) +VALUES + (1, '2023-03-15', 1, TRUE), + (2, '2023-03-15', 2, TRUE), + (3, '2023-03-15', 3, TRUE), + (4, '2023-03-15', 4, TRUE), + (5, '2023-03-15', 5, TRUE), + (6, '2023-03-15', 6, TRUE), + (7, '2023-03-15', 7, TRUE), + (8, '2023-03-15', 8, TRUE), + (9, '2023-03-15', 9, TRUE), + (10, '2023-03-15', 10, TRUE), + (11, '2023-03-20', 1, FALSE), + (12, '2023-03-20', 3, FALSE), + (13, '2023-03-22', 6, FALSE), + (14, '2023-03-25', 8, FALSE), + (15, '2023-03-28', 2, FALSE), + (16, '2023-04-01', 10, FALSE), + (17, '2023-04-05', 5, FALSE), + (18, '2023-04-10', 7, FALSE), + (19, '2023-04-12', 4, FALSE), + (20, '2023-04-15', 9, FALSE), + (21, '2023-03-28', 1, TRUE), + (22, '2023-03-28', 3, TRUE), + (23, '2023-03-28', 4, FALSE); + + DELETE FROM DEFOG.DEALERSHIP.PAYMENTS_RECEIVED; + INSERT INTO DEFOG.DEALERSHIP.PAYMENTS_RECEIVED (id, sale_id, payment_date, payment_amount, payment_method) +VALUES + (1, 1, '2023-03-15', 5000.00, 'check'), + (2, 1, '2023-03-20', 22500.00, 'financing'), + (3, 2, '2023-03-20', 44000.00, 'credit_card'), + (4, 3, '2023-03-22', 24500.00, 'debit_card'), + (5, 4, '2023-03-25', 38000.00, 'financing'), + (6, 5, '2023-03-28', 21500.00, 'cash'), + (7, 6, '2023-04-01', 27000.00, 'credit_card'), + (8, 7, '2023-04-05', 26000.00, 'debit_card'), + (9, 8, '2023-04-10', 60000.00, 'financing'), + (10, 9, '2023-04-12', 40000.00, 'check'), + (11, 10, '2023-04-15', 44500.00, 'credit_card'), + (12, 11, CURRENT_DATE - INTERVAL '30 days', 28000.00, 'cash'), + (13, 12, CURRENT_DATE - INTERVAL '3 days', 43500.00, 'credit_card'), + (14, 13, CURRENT_DATE - INTERVAL '6 days', 24000.00, 'debit_card'), + (15, 14, CURRENT_DATE - INTERVAL '1 days', 17200.00, 'financing'), + (16, 15, CURRENT_DATE - INTERVAL '1 days', 37500.00, 'credit_card'), + (17, 16, CURRENT_DATE - INTERVAL '5 days', 26500.00, 'debit_card'), + (18, 17, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' + INTERVAL '1 day', 115000.00, 'financing'), + (19, 18, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week', 115000.00, 'credit_card'), + (20, 19, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '1 week' - INTERVAL '1 day', 115000.00, 'debit_card'), + (21, 20, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' + INTERVAL '1 day', 115000.00, 'cash'), + (22, 21, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week', 115000.00, 'check'), + (23, 22, DATE_TRUNC('week', CURRENT_DATE) - INTERVAL '8 week' - INTERVAL '1 day', 115000.00, 'credit_card'); + + DELETE FROM DEFOG.DEALERSHIP.PAYMENTS_MADE; + INSERT INTO DEFOG.DEALERSHIP.PAYMENTS_MADE (id, vendor_name, payment_date, payment_amount, payment_method, invoice_number, invoice_date, due_date) +VALUES + (1, 'Car Manufacturer Inc', '2023-03-01', 150000.00, 'bank_transfer', 'INV-001', '2023-02-25', '2023-03-25'), + (2, 'Auto Parts Supplier', '2023-03-10', 25000.00, 'check', 'INV-002', '2023-03-05', '2023-04-04'), + (3, 'Utility Company', '2023-03-15', 1500.00, 'bank_transfer', 'INV-003', '2023-03-01', '2023-03-31'), + (4, 'Marketing Agency', '2023-03-20', 10000.00, 'credit_card', 'INV-004', '2023-03-15', '2023-04-14'), + (5, 'Insurance Provider', '2023-03-25', 5000.00, 'bank_transfer', 'INV-005', '2023-03-20', '2023-04-19'), + (6, 'Cleaning Service', '2023-03-31', 2000.00, 'check', 'INV-006', '2023-03-25', '2023-04-24'), + (7, 'Car Manufacturer Inc', '2023-04-01', 200000.00, 'bank_transfer', 'INV-007', '2023-03-25', '2023-04-24'), + (8, 'Auto Parts Supplier', '2023-04-10', 30000.00, 'check', 'INV-008', '2023-04-05', '2023-05-05'), + (9, 'Utility Company', '2023-04-15', 1500.00, 'bank_transfer', 'INV-009', '2023-04-01', '2023-04-30'), + (10, 'Marketing Agency', '2023-04-20', 15000.00, 'credit_card', 'INV-010', '2023-04-15', '2023-05-15'), + (11, 'Insurance Provider', '2023-04-25', 5000.00, 'bank_transfer', 'INV-011', '2023-04-20', '2023-05-20'), + (12, 'Cleaning Service', '2023-04-30', 2000.00, 'check', 'INV-012', '2023-04-25', '2023-05-25'), + (13, 'Toyota Auto Parts', CURRENT_DATE - INTERVAL '5 days', 12500.00, 'bank_transfer', 'INV-013', CURRENT_DATE - INTERVAL '10 days', CURRENT_DATE + INTERVAL '20 days'), + (14, 'Honda Manufacturing', CURRENT_DATE - INTERVAL '3 days', 18000.00, 'check', 'INV-014', CURRENT_DATE - INTERVAL '8 days', CURRENT_DATE + INTERVAL '22 days'), + (15, 'Ford Supplier Co', CURRENT_DATE - INTERVAL '2 days', 22000.00, 'bank_transfer', 'INV-015', CURRENT_DATE - INTERVAL '7 days', CURRENT_DATE + INTERVAL '23 days'), + (16, 'Tesla Parts Inc', CURRENT_DATE - INTERVAL '1 day', 15000.00, 'credit_card', 'INV-016', CURRENT_DATE - INTERVAL '6 days', CURRENT_DATE + INTERVAL '24 days'), + (17, 'Chevrolet Auto', CURRENT_DATE, 20000.00, 'bank_transfer', 'INV-017', CURRENT_DATE - INTERVAL '5 days', CURRENT_DATE + INTERVAL '25 days'); + + DELETE FROM DEFOG.BROKER.SBCUSTOMER; + INSERT INTO DEFOG.BROKER.SBCUSTOMER (sbCustId, sbCustName, sbCustEmail, sbCustPhone, sbCustAddress1, sbCustCity, sbCustState, sbCustCountry, sbCustPostalCode, sbCustJoinDate, sbCustStatus) VALUES +('C001', 'john doe', 'john.doe@email.com', '555-123-4567', '123 Main St', 'Anytown', 'CA', 'USA', '90001', '2020-01-01', 'active'), +('C002', 'Jane Smith', 'jane.smith@email.com', '555-987-6543', '456 Oak Rd', 'Someville', 'NY', 'USA', '10002', '2019-03-15', 'active'), +('C003', 'Bob Johnson', 'bob.johnson@email.com', '555-246-8135', '789 Pine Ave', 'Mytown', 'TX', 'USA', '75000', '2022-06-01', 'inactive'), +('C004', 'Samantha Lee', 'samantha.lee@email.com', '555-135-7902', '246 Elm St', 'Yourtown', 'CA', 'USA', '92101', '2018-09-22', 'suspended'), +('C005', 'Michael Chen', 'michael.chen@email.com', '555-864-2319', '159 Cedar Ln', 'Anothertown', 'FL', 'USA', '33101', '2021-02-28', 'active'), +('C006', 'Emily Davis', 'emily.davis@email.com', '555-753-1904', '753 Maple Dr', 'Mytown', 'TX', 'USA', '75000', '2020-07-15', 'active'), +('C007', 'David Kim', 'david.kim@email.com', '555-370-2648', '864 Oak St', 'Anothertown', 'FL', 'USA', '33101', '2022-11-05', 'active'), +('C008', 'Sarah Nguyen', 'sarah.nguyen@email.com', '555-623-7419', '951 Pine Rd', 'Yourtown', 'CA', 'USA', '92101', '2019-04-01', 'closed'), +('C009', 'William Garcia', 'william.garcia@email.com', '555-148-5326', '258 Elm Ave', 'Anytown', 'CA', 'USA', '90001', '2021-08-22', 'active'), +('C010', 'Jessica Hernandez', 'jessica.hernandez@email.com', '555-963-8520', '147 Cedar Blvd', 'Someville', 'NY', 'USA', '10002', '2020-03-10', 'inactive'), +('C011', 'Alex Rodriguez', 'alex.rodriguez@email.com', '555-246-1357', '753 Oak St', 'Newtown', 'NJ', 'USA', '08801', '2023-01-15', 'active'), +('C012', 'Olivia Johnson', 'olivia.johnson@email.com', '555-987-6543', '321 Elm St', 'Newtown', 'NJ', 'USA', '08801', '2023-01-05', 'active'), +('C013', 'Ethan Davis', 'ethan.davis@email.com', '555-246-8135', '654 Oak Ave', 'Someville', 'NY', 'USA', '10002', '2023-02-12', 'active'), +('C014', 'Ava Wilson', 'ava.wilson@email.com', '555-135-7902', '987 Pine Rd', 'Anytown', 'CA', 'USA', '90001', '2023-03-20', 'active'), +('C015', 'Emma Brown', 'emma.brown@email.com', '555-987-6543', '789 Oak St', 'Newtown', 'NJ', 'USA', '08801', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '5 months', 'active'), +('C016', 'sophia martinez', 'sophia.martinez@email.com', '555-246-8135', '159 Elm Ave', 'Anytown', 'CA', 'USA', '90001', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '4 months', 'active'), +('C017', 'Jacob Taylor', 'jacob.taylor@email.com', '555-135-7902', '753 Pine Rd', 'Someville', 'NY', 'USA', '10002', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '3 months', 'active'), +('C018', 'Michael Anderson', 'michael.anderson@email.com', '555-864-2319', '321 Cedar Ln', 'Yourtown', 'CA', 'USA', '92101', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months', 'active'), +('C019', 'Isabella Thompson', 'isabella.thompson@email.com', '555-753-1904', '987 Maple Dr', 'Anothertown', 'FL', 'USA', '33101', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month', 'active'), +('C020', 'Maurice Lee', 'maurice.lee@email.com', '555-370-2648', '654 Oak St', 'Mytown', 'TX', 'USA', '75000', DATE_TRUNC('month', CURRENT_DATE), 'active'); + + + DELETE FROM DEFOG.BROKER.SBTICKER; + INSERT INTO DEFOG.BROKER.SBTICKER (sbTickerId, sbTickerSymbol, sbTickerName, sbTickerType, sbTickerExchange, sbTickerCurrency, sbTickerDb2x, sbTickerIsActive) VALUES +('T001', 'AAPL', 'Apple Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T002', 'MSFT', 'Microsoft Corporation', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T003', 'AMZN', 'Amazon.com, Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T004', 'TSLA', 'Tesla, Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T005', 'GOOGL', 'Alphabet Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T006', 'FB', 'Meta Platforms, Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T007', 'BRK.B', 'Berkshire Hathaway Inc.', 'stock', 'NYSE', 'USD', 'NY', true), +('T008', 'JPM', 'JPMorgan Chase & Co.', 'stock', 'NYSE', 'USD', 'NY', true), +('T009', 'V', 'Visa Inc.', 'stock', 'NYSE', 'USD', 'NY', true), +('T010', 'PG', 'Procter & Gamble Company', 'stock', 'NYSE', 'USD', 'NY', true), +('T011', 'SPY', 'SPDR S&P 500 ETF Trust', 'etf', 'NYSE Arca', 'USD', 'NX', true), +('T012', 'QQQ', 'Invesco QQQ Trust', 'etf', 'NASDAQ', 'USD', 'NQ', true), +('T013', 'VTI', 'Vanguard Total Stock Market ETF', 'etf', 'NYSE Arca', 'USD', 'NX', true), +('T014', 'VXUS', 'Vanguard Total International Stock ETF', 'etf', 'NASDAQ', 'USD', 'NQ', true), +('T015', 'VFINX', 'Vanguard 500 Index Fund', 'mutualfund', 'Vanguard', 'USD', 'VG', true), +('T016', 'VTSAX', 'Vanguard Total Stock Market Index Fund', 'mutualfund', 'Vanguard', 'USD', 'VG', true), +('T017', 'VIGAX', 'Vanguard Growth Index Fund', 'mutualfund', 'Vanguard', 'USD', 'VG', true), +('T018', 'GOOG', 'Alphabet Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true), +('T019', 'VTI', 'Vanguard Total Stock Market ETF', 'etf', 'NYSE Arca', 'USD', 'NX', true), +('T020', 'VTSAX', 'Vanguard Total Stock Market Index Fund', 'mutualfund', 'Vanguard', 'USD', 'VG', true), +('T021', 'NFLX', 'Netflix, Inc.', 'stock', 'NASDAQ', 'USD', 'NQ', true); + + DELETE FROM DEFOG.BROKER.SBDAILYPRICE; + INSERT INTO DEFOG.BROKER.SBDAILYPRICE (sbDpTickerId, sbDpDate, sbDpOpen, sbDpHigh, sbDpLow, sbDpClose, sbDpVolume, sbDpEpochMs, sbDpSource) VALUES +('T001', '2023-04-01', 150.00, 152.50, 148.75, 151.25, 75000000, 1680336000000, 'NYSE'), +('T002', '2023-04-01', 280.00, 282.75, 279.50, 281.00, 35000000, 1680336000000, 'NASDAQ'), +('T003', '2023-04-01', 3200.00, 3225.00, 3180.00, 3210.00, 4000000, 1680336000000, 'NASDAQ'), +('T004', '2023-04-01', 180.00, 185.00, 178.50, 184.25, 20000000, 1680336000000, 'NASDAQ'), +('T005', '2023-04-01', 2500.00, 2525.00, 2475.00, 2510.00, 1500000, 1680336000000, 'NASDAQ'), +('T006', '2023-04-01', 200.00, 205.00, 198.00, 202.50, 15000000, 1680336000000, 'NASDAQ'), +('T007', '2023-04-01', 400000.00, 402500.00, 398000.00, 401000.00, 10000, 1680336000000, 'NYSE'), +('T008', '2023-04-01', 130.00, 132.50, 128.75, 131.00, 12000000, 1680336000000, 'NYSE'), +('T009', '2023-04-01', 220.00, 222.50, 218.00, 221.00, 8000000, 1680336000000, 'NYSE'), +('T010', '2023-04-01', 140.00, 142.00, 139.00, 141.50, 6000000, 1680336000000, 'NYSE'), +('T001', '2023-04-02', 151.50, 153.00, 150.00, 152.00, 70000000, 1680422400000, 'NYSE'), +('T002', '2023-04-02', 281.25, 283.50, 280.00, 282.75, 32000000, 1680422400000, 'NASDAQ'), +('T003', '2023-04-02', 3212.00, 3230.00, 3200.00, 3225.00, 3800000, 1680422400000, 'NASDAQ'), +('T004', '2023-04-02', 184.50, 187.00, 183.00, 186.00, 18000000, 1680422400000, 'NASDAQ'), +('T005', '2023-04-02', 2512.00, 2530.00, 2500.00, 2520.00, 1400000, 1680422400000, 'NASDAQ'), +('T006', '2023-04-02', 203.00, 206.50, 201.00, 205.00, 14000000, 1680422400000, 'NASDAQ'), +('T007', '2023-04-02', 401500.00, 403000.00, 400000.00, 402000.00, 9500, 1680422400000, 'NYSE'), +('T008', '2023-04-02', 131.25, 133.00, 130.00, 132.50, 11000000, 1680422400000, 'NYSE'), +('T009', '2023-04-02', 221.50, 223.00, 220.00, 222.00, 7500000, 1680422400000, 'NYSE'), +('T010', '2023-04-02', 141.75, 143.00, 140.50, 142.25, 5500000, 1680422400000, 'NYSE'), +('T001', '2023-04-03', 152.25, 154.00, 151.00, 153.50, 65000000, 1680508800000, 'NYSE'), +('T002', '2023-04-03', 283.00, 285.00, 281.50, 284.00, 30000000, 1680508800000, 'NASDAQ'), +('T003', '2023-04-03', 3227.00, 3240.00, 3220.00, 3235.00, 3600000, 1680508800000, 'NASDAQ'), +('T004', '2023-04-03', 186.25, 188.50, 185.00, 187.75, 16000000, 1680508800000, 'NASDAQ'), +('T005', '2023-04-03', 2522.00, 2540.00, 2515.00, 2535.00, 1300000, 1680508800000, 'NASDAQ'), +('T006', '2023-04-03', 205.50, 208.00, 203.50, 207.00, 13000000, 1680508800000, 'NASDAQ'), +('T007', '2023-04-03', 402500.00, 404000.00, 401000.00, 403500.00, 9000, 1680508800000, 'NYSE'), +('T008', '2023-04-03', 132.75, 134.50, 131.50, 133.75, 10000000, 1680508800000, 'NYSE'), +('T009', '2023-04-03', 222.25, 224.00, 221.00, 223.50, 7000000, 1680508800000, 'NYSE'), +('T010', '2023-04-03', 142.50, 144.00, 141.50, 143.25, 5000000, 1680508800000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '8 days', 204.00, 204.50, 202.75, 203.25, 8000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '8 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '6 days', 205.00, 207.50, 203.75, 206.25, 8000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '6 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '5 days', 206.50, 208.00, 205.00, 207.00, 7500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '5 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '4 days', 207.25, 209.00, 206.50, 208.50, 7000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '4 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '3 days', 208.75, 210.50, 207.75, 209.75, 6500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '3 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '2 days', 210.00, 211.75, 209.25, 211.00, 6000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '2 days') * 1000, 'NYSE'), +('T019', CURRENT_DATE - INTERVAL '1 day', 211.25, 213.00, 210.50, 212.25, 5500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '1 day') * 1000, 'NYSE'), +('T019', CURRENT_DATE, 212.50, 214.25, 211.75, 213.50, 5000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000, 'NYSE'), +('T020', CURRENT_DATE - INTERVAL '6 days', 82.00, 83.00, 81.50, 82.50, 1000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '6 days') * 1000, 'Vanguard'), +('T020', CURRENT_DATE - INTERVAL '5 days', 82.60, 83.60, 82.10, 83.10, 950000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '5 days') * 1000, 'Vanguard'), +('T020', CURRENT_DATE - INTERVAL '4 days', 83.20, 84.20, 82.70, 83.70, 900000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '4 days') * 1000, 'Vanguard'), +('T020', CURRENT_DATE - INTERVAL '3 days', 83.80, 84.80, 83.30, 84.30, 850000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '3 days') * 1000, 'Vanguard'), +('T020', CURRENT_DATE - INTERVAL '2 days', 84.40, 85.40, 83.90, 84.90, 800000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '2 days') * 1000, 'Vanguard'), +('T020', CURRENT_DATE - INTERVAL '1 day', 85.00, 86.00, 84.50, 85.50, 750000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '1 day') * 1000, 'Vanguard'), +('T020', CURRENT_DATE, 85.60, 86.60, 85.10, 86.10, 700000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000, 'Vanguard'), +('T021', CURRENT_DATE - INTERVAL '6 days', 300.00, 305.00, 297.50, 302.50, 10000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '6 days') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE - INTERVAL '5 days', 303.00, 308.00, 300.50, 305.50, 9500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '5 days') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE - INTERVAL '4 days', 306.00, 311.00, 303.50, 308.50, 9000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '4 days') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE - INTERVAL '3 days', 309.00, 314.00, 306.50, 311.50, 8500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '3 days') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE - INTERVAL '2 days', 312.00, 317.00, 309.50, 314.50, 8000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '2 days') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE - INTERVAL '1 day', 315.00, 320.00, 312.50, 317.50, 7500000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP - INTERVAL '1 day') * 1000, 'NASDAQ'), +('T021', CURRENT_DATE, 318.00, 323.00, 315.50, 320.50, 7000000, EXTRACT(EPOCH FROM CURRENT_TIMESTAMP) * 1000, 'NASDAQ'); + + DELETE FROM DEFOG.BROKER.SBTRANSACTION; + INSERT INTO DEFOG.BROKER.SBTRANSACTION (sbTxId, sbTxCustId, sbTxTickerId, sbTxDateTime, sbTxType, sbTxShares, sbTxPrice, sbTxAmount, sbTxCcy, sbTxTax, sbTxCommission, sbTxKpx, sbTxSettlementDateStr, sbTxStatus) VALUES +('TX001', 'C001', 'T001', '2023-04-01 09:30:00'::timestamp, 'buy', 100, 150.00, 15000.00, 'USD', 75.00, 10.00, 'KP001', '20230401 09:30:00', 'success'), +('TX002', 'C002', 'T002', '2023-04-01 10:15:00'::timestamp, 'sell', 50, 280.00, 14000.00, 'USD', 70.00, 10.00, 'KP002', '20230401 10:15:00', 'success'), +('TX003', 'C003', 'T003', '2023-04-01 11:00:00'::timestamp, 'buy', 10, 3200.00, 32000.00, 'USD', 160.00, 20.00, 'KP003', '20230401 11:00:00', 'success'), +('TX004', 'C003', 'T004', '2023-04-01 11:45:00'::timestamp, 'sell', 25, 180.00, 4500.00, 'USD', 22.50, 5.00, 'KP004', '20230401 11:45:00', 'success'), +('TX005', 'C005', 'T005', '2023-04-01 12:30:00'::timestamp, 'buy', 5, 2500.00, 12500.00, 'USD', 62.50, 15.00, 'KP005', '20230401 12:30:00', 'success'), +('TX006', 'C002', 'T006', '2023-04-01 13:15:00'::timestamp, 'sell', 75, 200.00, 15000.00, 'USD', 75.00, 10.00, 'KP006', '20230401 13:15:00', 'success'), +('TX007', 'C003', 'T007', '2023-04-01 14:00:00'::timestamp, 'buy', 1, 400000.00, 400000.00, 'USD', 2000.00, 100.00, 'KP007', '20230401 14:00:00', 'success'), +('TX008', 'C003', 'T008', '2023-04-01 14:45:00'::timestamp, 'sell', 100, 130.00, 13000.00, 'USD', 65.00, 10.00, 'KP008', '20230401 14:45:00', 'success'), +('TX009', 'C009', 'T009', '2023-04-01 15:30:00'::timestamp, 'buy', 50, 220.00, 11000.00, 'USD', 55.00, 10.00, 'KP009', '20230401 15:30:00', 'success'), +('TX010', 'C002', 'T010', '2023-04-01 16:15:00'::timestamp, 'sell', 80, 140.00, 11200.00, 'USD', 56.00, 10.00, 'KP010', '20230401 16:15:00', 'success'), +('TX011', 'C001', 'T001', '2023-04-02 09:30:00'::timestamp, 'sell', 50, 151.50, 7575.00, 'USD', 37.88, 5.00, 'KP011', '20230402 09:30:00', 'success'), +('TX012', 'C002', 'T002', '2023-04-02 10:15:00'::timestamp, 'buy', 30, 281.25, 8437.50, 'USD', 42.19, 7.50, 'KP012', '20230402 10:15:00', 'fail'), +('TX013', 'C003', 'T003', '2023-04-02 11:00:00'::timestamp, 'sell', 5, 3212.00, 16060.00, 'USD', 80.30, 15.00, 'KP013', '20230402 11:00:00', 'success'), +('TX014', 'C004', 'T004', '2023-04-02 11:45:00'::timestamp, 'buy', 15, 184.50, 2767.50, 'USD', 13.84, 5.00, 'KP014', '20230402 11:45:00', 'success'), +('TX015', 'C005', 'T005', '2023-04-02 12:30:00'::timestamp, 'sell', 2, 2512.00, 5024.00, 'USD', 25.12, 10.00, 'KP015', '20230402 12:30:00', 'success'), +('TX016', 'C006', 'T006', '2023-04-02 13:15:00'::timestamp, 'buy', 50, 203.00, 10150.00, 'USD', 50.75, 10.00, 'KP016', '20230402 13:15:00', 'success'), +('TX017', 'C007', 'T007', '2023-04-02 14:00:00'::timestamp, 'sell', 1, 401500.00, 401500.00, 'USD', 2007.50, 100.00, 'KP017', '20230402 14:00:00', 'success'), +('TX018', 'C008', 'T008', '2023-04-02 14:45:00'::timestamp, 'buy', 75, 131.25, 9843.75, 'USD', 49.22, 7.50, 'KP018', '20230402 14:45:00', 'success'), +('TX019', 'C009', 'T009', '2023-04-02 15:30:00'::timestamp, 'sell', 25, 221.50, 5537.50, 'USD', 27.69, 5.00, 'KP019', '20230402 15:30:00', 'success'), +('TX020', 'C010', 'T010', '2023-04-02 16:15:00'::timestamp, 'buy', 60, 141.75, 8505.00, 'USD', 42.53, 7.50, 'KP020', '20230402 16:15:00', 'success'), +('TX021', 'C001', 'T001', '2023-04-03 09:30:00'::timestamp, 'buy', 75, 152.25, 11418.75, 'USD', 57.09, 10.00, 'KP021', '20230403 09:30:00', 'fail'), +('TX022', 'C002', 'T002', '2023-04-03 10:15:00'::timestamp, 'sell', 40, 283.00, 11320.00, 'USD', 56.60, 10.00, 'KP022', '20230403 10:15:00', 'success'), +('TX023', 'C003', 'T003', '2023-04-03 11:00:00'::timestamp, 'buy', 8, 3227.00, 25816.00, 'USD', 129.08, 20.00, 'KP023', '20230403 11:00:00', 'success'), +('TX024', 'C004', 'T004', '2023-04-03 11:45:00'::timestamp, 'sell', 20, 186.25, 3725.00, 'USD', 18.63, 5.00, 'KP024', '20230403 11:45:00', 'success'), +('TX025', 'C005', 'T005', '2023-04-03 12:30:00'::timestamp, 'buy', 3, 2522.00, 7566.00, 'USD', 37.83, 15.00, 'KP025', '20230403 12:30:00', 'success'), +('TX026', 'C006', 'T006', '2023-04-03 13:15:00'::timestamp, 'sell', 60, 205.50, 12330.00, 'USD', 61.65, 10.00, 'KP026', '20230403 13:15:00', 'success'), +('TX027', 'C007', 'T007', '2023-04-03 14:00:00'::timestamp, 'buy', 1, 402500.00, 402500.00, 'USD', 2012.50, 100.00, 'KP027', '20230403 14:00:00', 'success'), +('TX028', 'C008', 'T008', '2023-04-03 14:45:00'::timestamp, 'sell', 90, 132.75, 11947.50, 'USD', 59.74, 7.50, 'KP028', '20230403 14:45:00', 'success'), +('TX029', 'C009', 'T009', '2023-04-03 15:30:00'::timestamp, 'buy', 40, 222.25, 8890.00, 'USD', 44.45, 10.00, 'KP029', '20230403 15:30:00', 'success'), +('TX030', 'C010', 'T010', '2023-04-03 16:15:00'::timestamp, 'sell', 70, 142.50, 9975.00, 'USD', 49.88, 10.00, 'KP030', '20230403 16:15:00', 'success'), +('TX031', 'C001', 'T001', CURRENT_DATE - INTERVAL '9 days', 'buy', 100, 150.00, 15000.00, 'USD', 75.00, 10.00, 'KP031', NULL, 'fail'), +('TX032', 'C002', 'T002', CURRENT_DATE - INTERVAL '8 days', 'sell', 80, 280.00, 14000.00, 'USD', 70.00, 10.00, 'KP032', TO_CHAR(CURRENT_DATE - INTERVAL '8 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX033', 'C003', 'T001', CURRENT_DATE - INTERVAL '7 days', 'buy', 120, 200.00, 24000.00, 'USD', 120.00, 15.00, 'KP033', TO_CHAR(CURRENT_DATE - INTERVAL '7 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX034', 'C004', 'T004', CURRENT_DATE - INTERVAL '6 days', 'sell', 90, 320.00, 28800.00, 'USD', 144.00, 12.00, 'KP034', TO_CHAR(CURRENT_DATE - INTERVAL '6 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX035', 'C005', 'T001', CURRENT_DATE - INTERVAL '5 days', 'buy', 150, 180.00, 27000.00, 'USD', 135.00, 20.00, 'KP035', NULL, 'fail'), +('TX036', 'C006', 'T006', CURRENT_DATE - INTERVAL '4 days', 'sell', 70, 300.00, 21000.00, 'USD', 105.00, 15.00, 'KP036', TO_CHAR(CURRENT_DATE - INTERVAL '4 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX037', 'C007', 'T007', CURRENT_DATE - INTERVAL '3 days', 'buy', 110, 220.00, 24200.00, 'USD', 121.00, 10.00, 'KP037', TO_CHAR(CURRENT_DATE - INTERVAL '3 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX038', 'C008', 'T008', CURRENT_DATE - INTERVAL '2 days', 'sell', 100, 350.00, 35000.00, 'USD', 175.00, 25.00, 'KP038', TO_CHAR(CURRENT_DATE - INTERVAL '2 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX039', 'C009', 'T007', CURRENT_DATE - INTERVAL '1 day', 'buy', 80, 230.00, 18400.00, 'USD', 92.00, 18.00, 'KP039', NULL, 'pending'), +('TX040', 'C001', 'T011', CURRENT_DATE - INTERVAL '10 days', 'buy', 50, 400.00, 20000.00, 'USD', 100.00, 20.00, 'KP040', TO_CHAR(CURRENT_DATE - INTERVAL '10 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX041', 'C002', 'T012', CURRENT_DATE - INTERVAL '9 days', 'sell', 30, 320.00, 9600.00, 'USD', 48.00, 15.00, 'KP041', TO_CHAR(CURRENT_DATE - INTERVAL '9 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX042', 'C003', 'T013', CURRENT_DATE - INTERVAL '8 days', 'buy', 80, 180.00, 14400.00, 'USD', 72.00, 10.00, 'KP042', TO_CHAR(CURRENT_DATE - INTERVAL '8 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX043', 'C004', 'T014', CURRENT_DATE - INTERVAL '7 days', 'sell', 60, 220.00, 13200.00, 'USD', 66.00, 12.00, 'KP043', NULL, 'pending'), +('TX044', 'C012', 'T001', '2023-01-15 10:00:00'::timestamp, 'buy', 80, 155.00, 12400.00, 'USD', 62.00, 10.00, 'KP044', '20230115 10:00:00', 'success'), +('TX045', 'C012', 'T001', '2023-01-16 10:30:00'::timestamp, 'buy', 80, 155.00, 12400.00, 'USD', 62.00, 10.00, 'KP045', '20230116 10:30:00', 'success'), +('TX046', 'C013', 'T002', '2023-02-20 11:30:00'::timestamp, 'sell', 60, 285.00, 17100.00, 'USD', 85.50, 15.00, 'KP046', '20230220 11:30:00', 'success'), +('TX047', 'C014', 'T003', '2023-03-25 14:45:00'::timestamp, 'buy', 5, 3250.00, 16250.00, 'USD', 81.25, 20.00, 'KP047', '20230325 14:45:00', 'success'), +('TX048', 'C012', 'T004', '2023-01-30 13:15:00'::timestamp, 'sell', 40, 190.00, 7600.00, 'USD', 38.00, 10.00, 'KP048', '20230130 13:15:00', 'success'), +('TX049', 'C013', 'T005', '2023-02-28 16:00:00'::timestamp, 'buy', 2, 2550.00, 5100.00, 'USD', 25.50, 15.00, 'KP049', '20230228 16:00:00', 'success'), +('TX050', 'C014', 'T006', '2023-03-30 09:45:00'::timestamp, 'sell', 30, 210.00, 6300.00, 'USD', 31.50, 10.00, 'KP050', '20230331 09:45:00', 'success'), +('TX051', 'C015', 'T001', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '5 months' + INTERVAL '1 day', 'buy', 50, 150.00, 7500.00, 'USD', 37.50, 10.00, 'KP051', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '5 months' + INTERVAL '1 day', '%Y%m%d %H:%i:%s'), 'success'), +('TX052', 'C016', 'T002', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '4 months' + INTERVAL '2 days', 'sell', 40, 280.00, 11200.00, 'USD', 56.00, 10.00, 'KP052', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '4 months' + INTERVAL '2 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX053', 'C017', 'T003', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '3 months' + INTERVAL '3 days', 'buy', 15, 3200.00, 48000.00, 'USD', 240.00, 20.00, 'KP053', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '3 months' + INTERVAL '3 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX054', 'C018', 'T004', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months' + INTERVAL '4 days', 'sell', 30, 180.00, 5400.00, 'USD', 27.00, 5.00, 'KP054', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '2 months' + INTERVAL '4 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX055', 'C019', 'T005', DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' + INTERVAL '5 days', 'buy', 10, 2500.00, 25000.00, 'USD', 125.00, 15.00, 'KP055', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) - INTERVAL '1 month' + INTERVAL '5 days', '%Y%m%d %H:%i:%s'), 'success'), +('TX056', 'C002', 'T006', DATE_TRUNC('month', CURRENT_DATE) + INTERVAL '1 day', 'sell', 20, 200.00, 4000.00, 'USD', 20.00, 10.00, 'KP056', TO_CHAR(DATE_TRUNC('month', CURRENT_DATE) + INTERVAL '1 day', '%Y%m%d %H:%i:%s'), 'success'); + +END +$$; + +------------------------------------------------------------------------ +-- 2. Create a task that calls the procedure everyday at midnight ET -- +------------------------------------------------------------------------ +CREATE OR REPLACE TASK DEFOG.BROKER.UPDATE_DATA_TASK_MIDNIGHT + WAREHOUSE = "DEMO_WH" + SCHEDULE = 'USING CRON 0 5 * * * UTC' +AS + CALL DEFOG.BROKER.DEFOG_DAILY_UPDATE(); + + +---------------------------------------------------- +-- 3. Enable the task -- +-- By default, after creation the task is paused -- +---------------------------------------------------- + +ALTER TASK DEFOG.BROKER.UPDATE_DATA_TASK_MIDNIGHT RESUME; + +---------------------------------------------------- +-- Grant execute privilege to role +---------------------------------------------------- +GRANT EXECUTE TASK ON ACCOUNT TO ROLE SYSADMIN; + +---------------------------------------------------- +-- Test procedure manually +-- Test task manually +---------------------------------------------------- +CALL EFOG.BROKER.DEFOG_DAILY_UPDATE(); +EXECUTE TASK DEFOG.BROKER.UPDATE_DATA_TASK_MIDNIGHT; + + +---------------------------------------------------- +-- Show state of the task +---------------------------------------------------- +SHOW TASKS LIKE 'UPDATE_DATA_TASK_MIDNIGHT' IN SCHEMA DEFOG.BROKER; + + +---------------------------------------------------- +-- Detailed history of task executions: +---------------------------------------------------- +-- SELECT * +SELECT QUERY_TEXT, STATE, ERROR_CODE, ERROR_MESSAGE, SCHEDULED_TIME +FROM TABLE(INFORMATION_SCHEMA.TASK_HISTORY( + TASK_NAME => 'UPDATE_DATA_TASK_MIDNIGHT', + RESULT_LIMIT => 10 +)) +ORDER BY SCHEDULED_TIME DESC; diff --git a/tests/test_metadata/snowflake_defog_graphs.json b/tests/test_metadata/snowflake_defog_graphs.json new file mode 100644 index 000000000..3a1007b13 --- /dev/null +++ b/tests/test_metadata/snowflake_defog_graphs.json @@ -0,0 +1,3396 @@ +[ + { + "name": "Broker", + "version": "V2", + "collections": [ + { + "name": "customers", + "type": "simple table", + "table path": "BROKER.sbCustomer", + "unique properties": ["_id"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "sbCustId", + "data type": "string", + "description": "The unique identifying string for the customer", + "sample values": ["C001", "C005", "C014"], + "synonyms": ["customer key"] + }, + { + "name": "name", + "type": "table column", + "column name": "sbCustName", + "data type": "string", + "description": "The name of the customer", + "sample values": ["Ava Wilson", "Bob Johnson", "David Kim", "Emily Davis"], + "synonyms": ["customer name", "first and last name"] + }, + { + "name": "email", + "type": "table column", + "column name": "sbCustEmail", + "data type": "string", + "description": "The email of the customer", + "sample values": ["alex.rodriguez@email.com", "ava.wilson@email.com", "arah.nguyen@email.com", "samantha.lee@email.com"], + "synonyms": ["email address", "customer email"] + }, + { + "name": "phone", + "type": "table column", + "column name": "sbCustPhone", + "data type": "string", + "description": "The phone number of the customer", + "sample values": ["555-123-4567", "555-623-7419", "555-135-7902"], + "synonyms": ["phone number", "customer phone"] + }, + { + "name": "address1", + "type": "table column", + "column name": "sbCustAddress1", + "data type": "string", + "description": "The address of the customer", + "sample values": ["123 Main St", "951 Pine Rd", "246 Elm St", "258 Elm Ave"], + "synonyms": ["first address line", "street address"] + }, + { + "name": "address2", + "type": "table column", + "column name": "sbCustAddress2", + "data type": "string", + "description": "The second address of the customer, if one exists", + "synonyms": ["second address line"] + }, + { + "name": "city", + "type": "table column", + "column name": "sbCustCity", + "data type": "string", + "description": "The city the customer lives in", + "sample values": ["Anytown", "Yourtown", "Someville", "Mytown"], + "synonyms": ["customer address city"] + }, + { + "name": "state", + "type": "table column", + "column name": "sbCustState", + "data type": "string", + "description": "The state the customer lives in, by its capitalized two-letter abbreviation", + "sample values": ["CA", "NY", "TX", "FL", "NJ"], + "synonyms": ["customer address state"] + }, + { + "name": "country", + "type": "table column", + "column name": "sbCustCountry", + "data type": "string", + "description": "The country the customer lives in", + "sample values": ["USA"], + "synonyms": ["customer address country"] + }, + { + "name": "postal_code", + "type": "table column", + "column name": "sbCustPostalCode", + "data type": "string", + "description": "The postal/zip code of the customer's address", + "sample values": ["90001", "10002", "08801"], + "synonyms": ["zip code", "customer address postal code"] + }, + { + "name": "join_date", + "type": "table column", + "column name": "sbCustJoinDate", + "data type": "datetime", + "description": "The date the customer joined the brokerage", + "synonyms": ["signup date", "customer join date", "account creation date"] + }, + { + "name": "status", + "type": "table column", + "column name": "sbCustStatus", + "data type": "string", + "description": "The state of the customer's account, which is either 'active', 'inactive', or 'suspended'", + "sample values": ["active", "inactive", "suspended"], + "synonyms": ["customer account status", "activity state"] + } + ], + "description": "The customers who have accounts with the the brokerage", + "synonyms": ["users", "clients", "members", "subscribers"] + }, + { + "name": "tickers", + "type": "simple table", + "table path": "BROKER.sbTicker", + "unique properties": ["_id"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "sbTickerId", + "data type": "string", + "description": "The unique identifying string for each ticker", + "sample values": ["T001", "T003", "T007", "T0018"], + "synonyms": ["ticker key"] + }, + { + "name": "symbol", + "type": "table column", + "column name": "sbTickerSymbol", + "data type": "string", + "description": "The symbol used to abbreviate the ticker name in a recognizable manner, e.g. APPL for Apple, or GOOG for Google", + "sample values": ["APPL", "AMZN", "BRK.B", "FB", "GOOG", "NFLX", "MSFT"], + "synonyms": ["ticker symbol", "ticker abbreviation"] + }, + { + "name": "name", + "type": "table column", + "column name": "sbTickerName", + "data type": "string", + "description": "The full name of the ticker, e.g. the company name", + "sample values": ["Berkshire Hathaway Inc.", "Alphabet Inc.", "Apple Inc.", "Microsoft Corporation"], + "synonyms": ["company name", "business name"] + }, + { + "name": "ticker_type", + "type": "table column", + "column name": "sbTickerType", + "data type": "string", + "description": "The category of ticker, which is either 'stock', 'etf', or 'mutual fund'", + "sample values": ["stock", "etf", "mutual fund"], + "synonyms": ["category", "classification"] + }, + { + "name": "exchange", + "type": "table column", + "column name": "sbTickerExchange", + "data type": "string", + "description": "The exchange that the ticker is listed on, which is either 'NASDAQ', 'NYSE', 'NYSE Arca', or 'Vanguard'", + "sample values": ["NASDAQ", "NYSE", "NYSE Arca", "Vanguard"], + "synonyms": [] + }, + { + "name": "currency", + "type": "table column", + "column name": "sbTickerCurrency", + "data type": "string", + "description": "The currency that the ticker is traded in, which is always 'USD'", + "sample values": ["USD"] + }, + { + "name": "db2x", + "type": "table column", + "column name": "sbTickerDb2x", + "data type": "string", + "description": "The two-letter exchange code corresponding to the ticker's exchange, which is either 'NQ' for NASDAQ, 'NY' for NYSE, 'NX' for 'NYSE Arca', or 'VA' for Vanguard", + "sample values": ["NQ", "NY", "NX", "VG"], + "synonyms": ["exchange code"] + }, + { + "name": "is_active", + "type": "table column", + "column name": "sbTickerIsActive", + "data type": "bool", + "description": "Whether the ticker is still active", + "synonyms": ["ticker in use"] + } + ], + "description": "All of the tickers that the brokerage trades", + "synonyms": ["stocks", "etfs", "mutual funds"] + }, + { + "name": "daily_prices", + "type": "simple table", + "table path": "BROKER.sbDailyPrice", + "unique properties": [["ticker_id", "date"]], + "properties": [ + { + "name": "ticker_id", + "type": "table column", + "column name": "sbDpTickerId", + "data type": "string", + "description": "The ticker id from the ticker whose price is being record", + "sample values": ["T002", "T003", "T005", "T007", "T011"], + "synonyms": ["ticker key"] + }, + { + "name": "date", + "type": "table column", + "column name": "sbDpDate", + "data type": "datetime", + "description": "The date of the price record", + "synonyms": ["record datetime", "price update date"] + }, + { + "name": "open", + "type": "table column", + "column name": "sbDpOpen", + "data type": "numeric", + "description": "The opening price of the ticker on the day of the record", + "sample values": [150, 3200, 2500, 280], + "synonyms": ["opening price"] + }, + { + "name": "high", + "type": "table column", + "column name": "sbDpHigh", + "data type": "numeric", + "description": "The highest price of the ticker during the day of the record", + "sample values": [152.5, 282.75, 3225, 185, 2525], + "synonyms": ["high price", "high-point", "maximum price during day"] + }, + { + "name": "low", + "type": "table column", + "column name": "sbDpLow", + "data type": "numeric", + "description": "The lowest price of the ticker during the day of the record", + "sample values": [148.75, 279.5, 3180, 178.5, 2475], + "synonyms": ["low price", "low-point", "minimum price during day"] + }, + { + "name": "close", + "type": "table column", + "column name": "sbDpClose", + "data type": "numeric", + "description": "The close price of the ticker on the day of the record", + "sample values": [151.25, 281, 3210, 184.25, 2510], + "synonyms": ["closing price"] + }, + { + "name": "volume", + "type": "table column", + "column name": "sbDpVolume", + "data type": "numeric", + "description": "The total number of shares of the ticker traded during the day", + "sample values": [75000000, 4000000, 1500000, 35000000], + "synonyms": ["daily trading volume"] + }, + { + "name": "epoch_ms", + "type": "table column", + "column name": "sbDpEpochMs", + "data type": "numeric", + "description": "The number of milliseconds since the epoch (January 1, 1970) that the record was created", + "sample values": [1680336000000, 1680336000000, 1680336000000], + "synonyms": ["created timestamp in epoch milliseconds"] + }, + { + "name": "source", + "type": "table column", + "column name": "sbDpSource", + "data type": "string", + "description": "The exchange where the price record originated from, which is either 'NASDAQ', 'NYSE' or 'Vanguard'", + "sample values": ["NYSE", "NASDAQ", "Vanguard"], + "synonyms": ["price record origin"] + } + ], + "description": "The daily updates on tickers including their price information within a single day", + "synonyms": ["historical stock prices", "daily ticker price data"] + }, + { + "name": "transactions", + "type": "simple table", + "table path": "BROKER.sbTransaction", + "unique properties": ["transaction_id", "kpx"], + "properties": [ + { + "name": "transaction_id", + "type": "table column", + "column name": "sbTxId", + "data type": "string", + "description": "The unique identifying string for each transaction", + "sample values": ["TX035", "TX001", "TX008", "TX033"], + "synonyms": ["transaction key"] + }, + { + "name": "customer_id", + "type": "table column", + "column name": "sbTxCustId", + "data type": "string", + "description": "The id of the customer who made the transaction", + "sample values": ["C003", "C005", "C007", "C001", "C008"], + "synonyms": ["customer key"] + }, + { + "name": "ticker_id", + "type": "table column", + "column name": "sbTxTickerId", + "data type": "string", + "description": "The id of the ticker being bought/sold in the transaction", + "sample values": ["T001", "T007", "T008"], + "synonyms": ["ticker key"] + }, + { + "name": "date_time", + "type": "table column", + "column name": "sbTxDateTime", + "data type": "datetime", + "description": "The timestamp that the transaction was made at", + "synonyms": ["transaction timestamp", "buy/sell datetime"] + }, + { + "name": "transaction_type", + "type": "table column", + "column name": "sbTxType", + "data type": "string", + "description": "The type of transaction, which is either 'buy' or 'sell'", + "sample values": ["buy", "sell"], + "synonyms": ["category", "classification"] + }, + { + "name": "shares", + "type": "table column", + "column name": "sbTxShares", + "data type": "numeric", + "description": "The number of shares being bought/sold in the transaction", + "sample values": [150, 120, 110], + "synonyms": ["volume traded", "transaction size"] + }, + { + "name": "price", + "type": "table column", + "column name": "sbTxPrice", + "data type": "numeric", + "description": "The price that the ticker was bought/sold at in the transaction", + "sample values": [180, 200, 220, 150, 130], + "synonyms": ["buy price", "sell price", "cost", "per-share value"] + }, + { + "name": "amount", + "type": "table column", + "column name": "sbTxAmount", + "data type": "numeric", + "description": "The total monetary value of the transaction (cost for buy, revenue for sell), which is the price multiplied by the number of shares", + "sample values": [27000, 24000, 24200, 15000, 13000], + "synonyms": ["total cost", "total revenue", "transaction value"] + }, + { + "name": "currency", + "type": "table column", + "column name": "sbTxCcy", + "data type": "string", + "description": "The currency that the transaction was made in, which is always 'USD'", + "sample values": ["USD"] + }, + { + "name": "tax", + "type": "table column", + "column name": "sbTxTax", + "data type": "numeric", + "description": "The tax applied by the brokerage onto the transaction", + "sample values": [135, 120, 121, 75, 65], + "synonyms": ["levy", "duty", "tariff"] + }, + { + "name": "commission", + "type": "table column", + "column name": "sbTxCommission", + "data type": "numeric", + "description": "The commission taken by the brokerage for the transaction", + "sample values": [20, 15, 10], + "synonyms": ["brokerage fee", "transaction fee"] + }, + { + "name": "kpx", + "type": "table column", + "column name": "sbTxKpx", + "data type": "string", + "description": "The internal code used for the transaction, which corresponds to the transaction id", + "sample values": ["KP056", "KP053", "KP011"], + "synonyms": [] + }, + { + "name": "settlement_date_str", + "type": "table column", + "column name": "sbTxSettlementDateStr", + "data type": "string", + "description": "The date the transaction was settled, which is a string in the form YYYYMMDD HH:MM:SS, or null if not settled yet", + "sample values": ["20230401 09:30:00", "20230401 10:15:00", "20230401 11:00:00"], + "synonyms": ["settlement timestamp", "datetime of transaction finalization"] + }, + { + "name": "status", + "type": "table column", + "column name": "sbTxStatus", + "data type": "string", + "description": "The status of the transaction, which is either 'success', 'fail', or 'pending'", + "sample values": ["success", "fail", "pending"], + "synonyms": ["transaction state"] + } + ], + "description": "All transactions made through the brokerage", + "synonyms": ["exchanges", "trades", "buy/sell orders"] + } + ], + "relationships": [ + { + "name": "transactions_made", + "type": "simple join", + "parent collection": "customers", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "customer_id" + ] + }, + "description": "All of the transactions made by the customer, including both buys and sells", + "synonyms": ["exchanges made", "trades made", "stock purchases and sales"] + }, + { + "name": "customer", + "type": "reverse", + "original parent": "customers", + "original property": "transactions_made", + "singular": true, + "always matches": true, + "description": "The customer who made the transaction", + "synonyms": ["user", "client", "stock buyer/seller"] + }, + { + "name": "transactions_of", + "type": "simple join", + "parent collection": "tickers", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "ticker_id" + ] + }, + "description": "The transactions made of a ticker, including both buys and sells of the ticker", + "synonyms": ["purchases", "sales", "exchanges", "trades of ticker"] + }, + { + "name": "ticker", + "type": "reverse", + "original parent": "tickers", + "original property": "transactions_of", + "singular": true, + "always matches": true, + "description": "The ticker that the transaction was made of", + "synonyms": ["stock", "etf", "fund", "company"] + }, + { + "name": "ticker", + "type": "simple join", + "parent collection": "daily_prices", + "child collection": "tickers", + "singular": true, + "always matches": true, + "keys": { + "ticker_id": [ + "_id" + ] + }, + "description": "The ticker that the daily price record corresponds to", + "synonyms": ["stock", "etf", "fund", "company"] + }, + { + "name": "daily_prices", + "type": "reverse", + "original parent": "daily_prices", + "original property": "ticker", + "singular": false, + "always matches": false, + "description": "The daily price records for the ticker", + "synonyms": ["historical prices", "price updates"] + } + ], + "additional definitions": [], + "verified pydough analysis": [], + "extra semantic info": {} + }, + { + "name": "Dealership", + "version": "V2", + "collections": [ + { + "name": "cars", + "type": "simple table", + "table path": "DEALERSHIP.cars", + "unique properties": ["_id", "vin_number"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "Unique identifier for each car", + "sample values": [1, 3, 5, 6, 18], + "synonyms": ["car key", "car id"] + }, + { + "name": "make", + "type": "table column", + "column name": "make", + "data type": "string", + "description": "The make of the car, e.g. Ford, Toyota, etc.", + "sample values": ["Toyota", "Honda", "Ford", "Tesla", "Nissan"], + "synonyms": ["car brand", "car manufacturer"] + }, + { + "name": "model", + "type": "table column", + "column name": "model", + "data type": "string", + "description": "The model name of the car, e..g Civic, Model 3, A4, RX350, Equinox, etc.", + "sample values": ["Camry", "Accord", "Mustang", "Civic", "Altima", "Tiguan"], + "synonyms": ["car model", "car type"] + }, + { + "name": "year", + "type": "table column", + "column name": "year", + "data type": "numeric", + "description": "Year the car was manufactured", + "sample values": [2021, 2022, 2023], + "synonyms": ["year of production", "manufacturing year"] + }, + { + "name": "color", + "type": "table column", + "column name": "color", + "data type": "string", + "description": "Color of the car", + "sample values": ["Silver", "platinum/grey", "blue", "Titan Silver", "black", "ebony black", "baby blue", "Black", "Jade", "Fiery red"], + "synonyms": ["paint color"] + }, + { + "name": "vin_number", + "type": "table column", + "column name": "vin_number", + "data type": "string", + "description": "Vehicle identification number, a unique code used to identify individual motor vehicles", + "sample values": ["1C4HJXDG3NW123456", "1C6SRFFT3NN123456", "3VV2B7AX1NM123456"], + "synonyms": ["VIN", "vehicle ID"] + }, + { + "name": "engine_type", + "type": "table column", + "column name": "engine_type", + "data type": "string", + "description": "Type of engine (e.g., V6, V8, Electric)", + "sample values": [ + "Boxer 4", + "Electric", + "Flat 6", + "Inline 4", + "Inline 6", + "V6", + "V8" + ], + "synonyms": ["motor", "engine model"] + }, + { + "name": "transmission", + "type": "table column", + "column name": "transmission", + "data type": "string", + "description": "Type of transmission, which is always either 'Automatic', 'Manual', or 'CVT'", + "sample values": ["Automatic", "Manual", "CVT"], + "synonyms": ["gearbox", "transmission type"] + }, + { + "name": "cost", + "type": "table column", + "column name": "cost", + "data type": "numeric", + "description": "Cost of the car", + "sample values": [38000.0, 45000.0, 22000.0, 32000.0], + "synonyms": ["value"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the car was added to the system", + "synonyms": ["created timestamp", "system entry date"] + } + ], + "description": "All cars recorded in the dealership system", + "synonyms": ["vehicles", "automobiles"] + }, + { + "name": "salespeople", + "type": "simple table", + "table path": "DEALERSHIP.salespersons", + "unique properties": ["_id", "email"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "Unique identifier for each salesperson in the dealership", + "sample values": [1, 5, 10, 13], + "synonyms": ["salesperson key", "salesperson id", "salesperson number"] + }, + { + "name": "first_name", + "type": "table column", + "column name": "first_name", + "data type": "string", + "description": "The first name of the salesperson", + "sample values": ["John", "Jane", "David", "Sarah"] + }, + { + "name": "last_name", + "type": "table column", + "column name": "last_name", + "data type": "string", + "description": "The last name of the salesperson", + "sample values": ["Doe", "Smith", "Taylor", "Thomas"], + "synonyms": ["surname"] + }, + { + "name": "email", + "type": "table column", + "column name": "email", + "data type": "string", + "description": "Work email of the salesperson", + "sample values": ["john.doe@autonation.com", "michael.johnson@autonation.com", "jennifer.davis@directauto.com"], + "synonyms": ["email address", "work email"] + }, + { + "name": "phone", + "type": "table column", + "column name": "phone", + "data type": "string", + "description": "Contact phone number of the salesperson, where tte first 3 digits are the area code. Format: (123)-456-7890", + "sample values": ["(555)-123-4567", "(444)-333-4444", "(001)-415-5678", "(555)-789-0123"], + "synonyms": ["phone number", "contact information"] + }, + { + "name": "hire_date", + "type": "table column", + "column name": "hire_date", + "data type": "datetime", + "description": "The date the salesperson was hired", + "synonyms": ["employment date", "start date"] + }, + { + "name": "termination_date", + "type": "table column", + "column name": "termination_date", + "data type": "datetime", + "description": "The date the salesperson left the dealership, if applicable (null if still working there)", + "synonyms": ["firing date", "exit date", "quitting date", "end date"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the salesperson record was created", + "synonyms": ["created timestamp", "system entry date"] + } + ], + "description": "The salespeople working at the dealership, current and former", + "synonyms": ["dealership employees", "sales staff", "sales associates"] + }, + { + "name": "customers", + "type": "simple table", + "table path": "DEALERSHIP.customers", + "unique properties": ["_id", "email"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "Unique identifier for each customer", + "sample values": [1, 5, 9, 12], + "synonyms": ["customer key", "customer id"] + }, + { + "name": "first_name", + "type": "table column", + "column name": "first_name", + "data type": "string", + "description": "The first name of the customer", + "sample values": ["William", "Ava", "Sophia", "Mia"] + }, + { + "name": "last_name", + "type": "table column", + "column name": "last_name", + "data type": "string", + "description": "The last name of the customer", + "sample values": ["Davis", "Miller", "Jackson", "Lee"], + "synonyms": ["surname"] + }, + { + "name": "email", + "type": "table column", + "column name": "email", + "data type": "string", + "description": "The email address of the customer", + "sample values": ["william.davis@example.com", "charlotte.anderson@example.com", "sophia.lee@microsoft.com", "christopher.brown@ai.com", "abigail.harris@company.io"], + "synonyms": ["contact email", "email address"] + }, + { + "name": "phone", + "type": "table column", + "column name": "phone", + "data type": "string", + "description": "The phone number of the customer listed with the dealership", + "sample values": ["555-888-9999", "555-666-5555", "555-456-7890", "555-444-3333"], + "synonyms": ["contact information", "phone number"] + }, + { + "name": "address", + "type": "table column", + "column name": "address", + "data type": "string", + "description": "The home address of the customer given to the dealership", + "sample values": ["123 Main St", "456 Oak Ave", "987 Birch Dr", "951 Oak Ln"], + "synonyms": ["street address", "home address", "street name and street number"] + }, + { + "name": "city", + "type": "table column", + "column name": "city", + "data type": "string", + "description": "The city of the customer's home address", + "sample values": ["New York", "Los Angeles", "Chicago", "Dallas"] + }, + { + "name": "state", + "type": "table column", + "column name": "state", + "data type": "string", + "description": "The state of the customer's home address, by its capitalized two-letter abbreviation", + "sample values": ["NY", "CA", "TX", "IL"] + }, + { + "name": "zip_code", + "type": "table column", + "column name": "zip_code", + "data type": "string", + "description": "The zip-code of the customer's home address", + "sample values": ["10001", "60007", "95101", "94101"], + "synonyms": [] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the customer record was created", + "synonyms": ["created timestamp", "system entry date"] + } + ], + "description": "All the customers who are registered with the dealership", + "synonyms": ["clients", "users", "buyers", "consumers"] + }, + { + "name": "payments_made", + "type": "simple table", + "table path": "DEALERSHIP.payments_made", + "unique properties": ["_id", "invoice_number"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "Unique identifier for each payment made", + "sample values": [1, 11, 16, 17], + "synonyms": ["payment key", "payment id"] + }, + { + "name": "vendor_name", + "type": "table column", + "column name": "vendor_name", + "data type": "string", + "description": "Name of the vendor to whom the payment was made", + "sample values": ["Chevrolet Auto", "Marketing Agency", "Car Manufacturer Inc"], + "synonyms": ["payment recipient", "vendor"] + }, + { + "name": "payment_date", + "type": "table column", + "column name": "payment_date", + "data type": "datetime", + "description": "Date that the payment was made" + }, + { + "name": "payment_amount", + "type": "table column", + "column name": "payment_amount", + "data type": "numeric", + "description": "The amount that was paid in the payment in USD", + "sample values": [150000.0, 1500.0, 22000.0], + "synonyms": ["payment sum", "dollar value"] + }, + { + "name": "payment_method", + "type": "table column", + "column name": "payment_method", + "data type": "string", + "description": "Method of payment, which is either 'bank_transfer', 'credit_card', 'check', or 'credit_card'", + "sample values": ["bank_transfer", "credit_card", "check", "credit_card"], + "synonyms": ["transfer medium", "payment type"] + }, + { + "name": "invoice_number", + "type": "table column", + "column name": "invoice_number", + "data type": "string", + "description": "Invoice number associated with the payment", + "sample values": ["INV-001", "INV-017", "INV-008"], + "synonyms": ["invoice id", "invoice key", "invoice serial"] + }, + { + "name": "invoice_date", + "type": "table column", + "column name": "invoice_date", + "data type": "datetime", + "description": "Date of the invoice", + "synonyms": ["date of payment"] + }, + { + "name": "due_date", + "type": "table column", + "column name": "due_date", + "data type": "datetime", + "description": "Due date of the invoice", + "synonyms": ["deadline", "expected payment date"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the payment made record was created", + "synonyms": ["system entry date", "created timestamp"] + } + ], + "description": "Records of payments made by the dealership to vendors", + "synonyms": ["vendor payments", "vendor transactions", "vendor expenditures"] + }, + { + "name": "payments_received", + "type": "simple table", + "table path": "DEALERSHIP.payments_received", + "unique properties": ["_id"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "Unique identifier for each payment received record", + "sample values": [1, 2, 10, 19, 23], + "synonyms": ["payment key", "payment id"] + }, + { + "name": "sale_id", + "type": "table column", + "column name": "sale_id", + "data type": "numeric", + "description": "The id of the sale that the payment is associated with", + "sample values": [1, 5, 9, 20, 21], + "synonyms": ["sale key", "sale id"] + }, + { + "name": "payment_date", + "type": "table column", + "column name": "payment_date", + "data type": "datetime", + "description": "Date when the payment was received. Can take place after the sale date, or in installments.", + "synonyms": ["transaction date", "installment date"] + }, + { + "name": "payment_amount", + "type": "table column", + "column name": "payment_amount", + "data type": "numeric", + "description": "Amount of the payment received. Can be less than the sale price if the payment is made in installments.", + "sample values": [115000.0, 5000.0, 44000.0, 26000.0], + "synonyms": ["cash received", "payment sum"] + }, + { + "name": "payment_method", + "type": "table column", + "column name": "payment_method", + "data type": "string", + "description": "Method used to make the payment, which can be 'check', 'financing', 'credit_card', 'debit_card', or 'cash'", + "sample values": ["check", "financing", "credit_card", "debit_card", "cash"], + "synonyms": ["transfer method", "payment type"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the payment received record was created", + "synonyms": ["system entry date", "created timestamp"] + } + ], + "description": "Records of payments received by the dealership from customers as part of a sale", + "synonyms": ["car purchase payments", "customer payments", "customer transactions", "car purchase installments"] + }, + { + "name": "sales", + "type": "simple table", + "table path": "DEALERSHIP.sales", + "unique properties": ["_id"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "Unique identifier for each car sale", + "sample values": [5, 11, 12, 18, 22], + "synonyms": ["sale key", "sale id"] + }, + { + "name": "car_id", + "type": "table column", + "column name": "car_id", + "data type": "numeric", + "description": "The id of the car being purchased in the sale", + "sample values": [1, 3, 5, 6, 18], + "synonyms": ["car key", "car id"] + }, + { + "name": "salesperson_id", + "type": "table column", + "column name": "salesperson_id", + "data type": "numeric", + "description": "The id of the salesperson who brokered the sale", + "sample values": [1, 5, 10, 13], + "synonyms": ["salesperson key", "salesperson id"] + }, + { + "name": "customer_id", + "type": "table column", + "column name": "customer_id", + "data type": "numeric", + "description": "The id of the customer who purchased the car in the sale", + "sample values": [1, 5, 9, 12], + "synonyms": ["customer key", "customer id"] + }, + { + "name": "sale_price", + "type": "table column", + "column name": "sale_price", + "data type": "numeric", + "description": "Price at which the car was sold", + "sample values": [43500.0, 30500.0, 38000.0, 49000.0, 26500.0], + "synonyms": ["purchase cost", "sale amount", "dollar value"] + }, + { + "name": "sale_date", + "type": "table column", + "column name": "sale_date", + "data type": "datetime", + "description": "Date when the car was sold", + "synonyms": ["purchase date", "transaction date"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the sale record was created", + "synonyms": ["system entry date", "created timestamp"] + } + ], + "description": "Records of cars being purchased from the dealership from a customer via one of the salespeople", + "synonyms": ["car purchases", "sale records", "vehicle sale transactions"] + }, + { + "name": "inventory_snapshots", + "type": "simple table", + "table path": "DEALERSHIP.inventory_snapshots", + "unique properties": ["_id"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "Unique identifier for each inventory snapshot entry", + "sample values": [1, 4, 13, 19, 23], + "synonyms": ["inventory snapshot key", "snapshot id"] + }, + { + "name": "snapshot_date", + "type": "table column", + "column name": "snapshot_date", + "data type": "datetime", + "description": "Date of the inventory snapshot", + "synonyms": ["date of snapshot", "inventory date"] + }, + { + "name": "car_id", + "type": "table column", + "column name": "car_id", + "data type": "numeric", + "description": "The key of the car that the inventory snapshot is for", + "sample values": [2, 10, 1, 4, 9], + "synonyms": ["car key", "car id"] + }, + { + "name": "is_in_inventory", + "type": "table column", + "column name": "is_in_inventory", + "data type": "bool", + "description": "Whether that specific car was in the dealership's inventory at the time of the snapshot", + "synonyms": ["car in inventory", "car availability"] + }, + { + "name": "crtd_ts", + "type": "table column", + "column name": "crtd_ts", + "data type": "datetime", + "description": "Timestamp when the inventory snapshot record was created", + "synonyms": ["system entry date", "created timestamp"] + } + ], + "description": "Records of whether specific individual cars were in the dealership's inventory as of specific dates", + "synonyms": ["inventory records", "stock records", "car availability manifest"] + } + ], + "relationships": [ + { + "name": "sale_records", + "type": "simple join", + "parent collection": "cars", + "child collection": "sales", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "car_id" + ] + }, + "description": "All sales made where the car was sold", + "synonyms": ["purchases of vehicle", "car sales", "vehicle transactions"] + }, + { + "name": "car", + "type": "reverse", + "original parent": "cars", + "original property": "sale_records", + "singular": true, + "always matches": true, + "description": "The car being purchased in the sale", + "synonyms": ["vehicle", "automobile"] + }, + { + "name": "inventory_snapshots", + "type": "simple join", + "parent collection": "cars", + "child collection": "inventory_snapshots", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "car_id" + ] + }, + "description": "The inventory snapshots for the car, which show whether it was in the dealership's inventory at specific dates", + "synonyms": ["availability records", "inventory history"] + }, + { + "name": "car", + "type": "reverse", + "original parent": "cars", + "original property": "inventory_snapshots", + "singular": true, + "always matches": true, + "description": "The car that the inventory snapshot belongs to", + "synonyms": ["vehicle", "automobile"] + }, + { + "name": "sales_made", + "type": "simple join", + "parent collection": "salespeople", + "child collection": "sales", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "salesperson_id" + ] + }, + "description": "All the sales brokered by the salesperson", + "synonyms": ["sales records", "transactions made", "deals closed"] + }, + { + "name": "salesperson", + "type": "reverse", + "original parent": "salespeople", + "original property": "sales_made", + "singular": true, + "always matches": true, + "description": "The salesperson who brokered the car purchase deal", + "synonyms": ["broker", "sales associate", "sales staff", "dealer"] + }, + { + "name": "car_purchases", + "type": "simple join", + "parent collection": "customers", + "child collection": "sales", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "customer_id" + ] + }, + "description": "All car purchases made by the customer", + "synonyms": ["vehicle purchase records", "car transactions", "automobile purchases"] + }, + { + "name": "customer", + "type": "reverse", + "original parent": "customers", + "original property": "car_purchases", + "singular": true, + "always matches": true, + "description": "The customer who purchased the car", + "synonyms": ["client", "buyer", "purchaser", "consumer", "user"] + }, + { + "name": "payment", + "type": "simple join", + "parent collection": "sales", + "child collection": "payments_received", + "singular": false, + "always matches": false, + "keys": { + "_id": [ + "sale_id" + ] + }, + "description": "The payment installments made for the sale, which can be one or multiple installments for a single sale", + "synonyms": ["sale purchase installments", "payment records"] + }, + { + "name": "sale_record", + "type": "reverse", + "original parent": "sales", + "original property": "payment", + "singular": true, + "always matches": true, + "description": "The sale that the payment is a part of", + "synonyms": ["purchase record", "vehicle sale", "car purchase"] + } + ], + "additional definitions": [], + "verified pydough analysis": [], + "extra semantic info": {} + }, + { + "name": "DermTreatment", + "version": "V2", + "collections": [ + { + "name": "doctors", + "type": "simple table", + "table path": "DERMTREATMENT.doctors", + "unique properties": ["doc_id"], + "properties": [ + { + "name": "doc_id", + "type": "table column", + "column name": "doc_id", + "data type": "numeric", + "description": "Unique identifier for each doctor in the system", + "sample values": [1, 2, 4, 8, 10], + "synonyms": ["doctor key", "doctor id"] + }, + { + "name": "first_name", + "type": "table column", + "column name": "first_name", + "data type": "string", + "description": "The first name of the doctor", + "sample values": ["John", "Michael", "Daniel", "Olivia"] + }, + { + "name": "last_name", + "type": "table column", + "column name": "last_name", + "data type": "string", + "description": "The last name of the doctor", + "sample values": ["Smith", "Brown", "Wilson", "Garcia"], + "synonyms": ["surname"] + }, + { + "name": "specialty", + "type": "table column", + "column name": "specialty", + "data type": "string", + "description": "The specialty of the doctor, which can be one of the following: dermatology, immunology, general, or oncology", + "sample values": ["dermatology", "immunology", "general", "oncology"], + "synonyms": ["focus", "area", "specialization", "expertise", "type of doctor"] + }, + { + "name": "year_reg", + "type": "table column", + "column name": "year_reg", + "data type": "numeric", + "description": "Year the doctor was registered and obtained license", + "sample values": [1998, 2006, 2012, 2015, 2018], + "synonyms": ["registration year", "year received license"] + }, + { + "name": "med_school_name", + "type": "table column", + "column name": "med_school_name", + "data type": "string", + "description": "The name of the medical school where the doctor graduated", + "sample values": ["University of Pennsylvania", "Johns Hopkins University", "Columbia University"], + "synonyms": ["alma matter", "medical school", "graduated from"] + }, + { + "name": "city", + "type": "table column", + "column name": "loc_city", + "data type": "string", + "description": "The city where the doctor is located", + "sample values": ["San Francisco", "Durham", "Ann Arbour", "New York"], + "synonyms": ["city of practice", "city of work", "location city", "city of residence"] + }, + { + "name": "state", + "type": "table column", + "column name": "loc_state", + "data type": "string", + "description": "The state where the doctor is located, by its capitalized two-letter abbreviation", + "sample values": ["CA", "NC", "NY", "MI"], + "synonyms": ["state of practice", "state of work", "location state", "state of residence"] + }, + { + "name": "zip_code", + "type": "table column", + "column name": "loc_zip", + "data type": "string", + "description": "The zip code where the doctor is located", + "sample values": ["21201", "02115", "94304", "10027"], + "synonyms": ["zip code of practice", "zip code of work", "location zip code", "zip code of residence"] + }, + { + "name": "board_certification_number", + "type": "table column", + "column name": "bd_cert_num", + "data type": "string", + "description": "The board certification number of the doctor", + "sample values": ["YZA890", "ABC123", "GHI012", "JKL345"], + "synonyms": ["board certification number", "certification number"] + } + ], + "description": "All the doctors registered in the system", + "synonyms": ["practitioners", "physicians", "medical professionals"] + }, + { + "name": "patients", + "type": "simple table", + "table path": "DERMTREATMENT.patients", + "unique properties": ["patient_id", "email", "phone"], + "properties": [ + { + "name": "patient_id", + "type": "table column", + "column name": "patient_id", + "data type": "numeric", + "description": "The unique identifier for each patient in the system", + "sample values": [1, 3, 5, 7, 9], + "synonyms": ["patient key", "patient id"] + }, + { + "name": "first_name", + "type": "table column", + "column name": "first_name", + "data type": "string", + "description": "The first name of the patient", + "sample values": ["Alice", "Bob", "Carol", "David", "Eve"] + }, + { + "name": "last_name", + "type": "table column", + "column name": "last_name", + "data type": "string", + "description": "The last name of the patient", + "sample values": ["Smith", "Richter", "Martinez", "Taylor"], + "synonyms": ["surname"] + }, + { + "name": "date_of_birth", + "type": "table column", + "column name": "date_of_birth", + "data type": "datetime", + "description": "The date the patient was born", + "synonyms": ["birthday"] + }, + { + "name": "date_of_registration", + "type": "table column", + "column name": "date_of_registration", + "data type": "datetime", + "description": "The date the patient was registered in the system", + "synonyms": ["registration date", "system entry date"] + }, + { + "name": "gender", + "type": "table column", + "column name": "gender", + "data type": "string", + "description": "The gender of the patient, which can either 'Male', 'Female', or 'Others'", + "sample values": ["Male", "Female", "Others"], + "synonyms": ["sex"] + }, + { + "name": "email", + "type": "table column", + "column name": "email", + "data type": "string", + "description": "The email of the patient used for contact information", + "sample values": ["alice@email.com", "eve@email.com", "frank@email.com", "john@qwik.com'"], + "synonyms": ["email address", "contact email"] + }, + { + "name": "phone", + "type": "table column", + "column name": "phone", + "data type": "string", + "description": "The phone number of the patient used for contact information", + "sample values": ["555-123-4567", "555-987-6543", "555-123-4567"], + "synonyms": ["contact phone", "phone number"] + }, + { + "name": "street_address", + "type": "table column", + "column name": "addr_city", + "data type": "string", + "description": "The street address of the patient's home, including street name and number", + "sample values": ["987 Birch Dr", "753 Walnut Ave", "951 Spruce Blvd"], + "synonyms": ["street name and number", "home address"] + }, + { + "name": "city", + "type": "table column", + "column name": "addr_city", + "data type": "string", + "description": "The city of the patient's address", + "sample values": ["Anytown", "Somecity", "Somewhere", "Somewhere"], + "synonyms": ["home city", "town", "city of residence"] + }, + { + "name": "state", + "type": "table column", + "column name": "addr_state", + "data type": "string", + "description": "The state of the patient's address, by its capitalized two-letter abbreviation", + "sample values": ["CA", "TX", "FL", "OH"], + "synonyms": ["home state", "state of residence"] + }, + { + "name": "zip_code", + "type": "table column", + "column name": "addr_zip", + "data type": "string", + "description": "The zip code of the patient's address", + "sample values": ["12345", "54321", "86420"], + "synonyms": ["home zip code", "postal code", "zip code of residence"] + }, + { + "name": "insurance_type", + "type": "table column", + "column name": "ins_type", + "data type": "string", + "description": "The type of insurance the patient has, which can be one of the following: private, medicare, medicaid, or uninsured", + "sample values": ["private", "medicare", "medicaid", "uninsured"], + "synonyms": [] + }, + { + "name": "insurance_policy_number", + "type": "table column", + "column name": "ins_policy_num", + "data type": "string", + "description": "The policy number of the patient's insurance (null if uninsured)", + "sample values": ["ABC123456", "XYZ789012", "JKL567890", "PQR135790"], + "synonyms": ["policy number"] + }, + { + "name": "height", + "type": "table column", + "column name": "height_cm", + "data type": "numeric", + "description": "The height of the patient in centimeters", + "sample values": [165, 180, 160, 178], + "synonyms": ["centimeter height"] + }, + { + "name": "weight", + "type": "table column", + "column name": "weight_kg", + "data type": "numeric", + "description": "The weight of the patient in kilograms", + "sample values": [60, 78, 90, 60], + "synonyms": ["kilogram weight"] + } + ], + "description": "The personal & registration information about every patient registered in the system", + "synonyms": ["patient records", "patient information", "patient data", "clients", "trial subjects", "participants"] + }, + { + "name": "drugs", + "type": "simple table", + "table path": "DERMTREATMENT.drugs", + "unique properties": ["drug_id", "national_drug_code"], + "properties": [ + { + "name": "drug_id", + "type": "table column", + "column name": "drug_id", + "data type": "numeric", + "description": "The unique identifier for each drug in the system", + "sample values": [1, 2, 4, 8], + "synonyms": ["drug key", "drug id"] + }, + { + "name": "drug_name", + "type": "table column", + "column name": "drug_name", + "data type": "string", + "description": "The brand name of the drug", + "sample values": ["Drugalin", "Topizol", "Biologic-X", "Topicalin"], + "synonyms": ["brand name", "pharmaceutical name", "medication name"] + }, + { + "name": "manufacturer", + "type": "table column", + "column name": "manufacturer", + "data type": "string", + "description": "The name of the company who produces the drug", + "sample values": ["Pharma Inc", "Acme Pharma", "Derma Rx", "BioMed Ltd"], + "synonyms": ["producer", "drug manufacturer", "pharmaceutical company", "drug company"] + }, + { + "name": "drug_type", + "type": "table column", + "column name": "drug_type", + "data type": "string", + "description": "The category of drug, which can be one of the following: 'biologic', 'small molecule', or 'topical'", + "sample values": ["biologic", "small molecule", "topical"], + "synonyms": ["category", "classification", "type of drug"] + }, + { + "name": "mechanism_of_activation", + "type": "table column", + "column name": "moa", + "data type": "string", + "description": "The mechanism of activation of the drug", + "sample values": ["TNF-alpha inhibitor", "PDE4 inhibitor", "IL-12/23 inhibitor"], + "synonyms": ["moa"] + }, + { + "name": "fda_approval_date", + "type": "table column", + "column name": "fda_appr_dt", + "data type": "datetime", + "description": "The date the drug was approved by the FDA (null if not approved because it is still under trial)", + "synonyms": ["approval date"] + }, + { + "name": "administration_route", + "type": "table column", + "column name": "admin_route", + "data type": "string", + "description": "The means used to administer the drug, which can be one of the following: 'oral', 'injection' or 'topical'", + "sample values": ["oral", "injection", "topical"], + "synonyms": ["method of procedure", "administration method", "delivery means"] + }, + { + "name": "recommended_dosage_amount", + "type": "table column", + "column name": "dos_amt", + "data type": "numeric", + "description": "The recommended amount of the drug to be taken per dose, where the unit is specified in the dosage_units property", + "sample values": [40, 30, 15, 5], + "synonyms": ["dose size", "quantity per dose", "amount taken per administration"] + }, + { + "name": "dosage_units", + "type": "table column", + "column name": "dos_unit", + "data type": "string", + "description": "The unit used by recommended_dosage_amount, which is either `mg` (for milligrams) or `g` (for grams)", + "sample values": ["mg", "g"], + "synonyms": ["units of measurement", "dosage unit", "dose quantity scale factor"] + }, + { + "name": "dose_frequency_hours", + "type": "table column", + "column name": "dos_freq_hrs", + "data type": "numeric", + "description": "The recommended number of hours between dosages", + "sample values": [336, 24, 12, 672], + "synonyms": ["gap between doses", "administration hour intervals", "time between doses"] + }, + { + "name": "national_drug_code", + "type": "table column", + "column name": "ndc", + "data type": "string", + "description": "The national drug code of the drug, which is a unique identifier assigned to each drug by the FDA", + "sample values": ["12345-678-90", "13579-246-80", "95146-753-19"], + "synonyms": [] + } + ], + "description": "The drugs listed in the system, which are used for treatment of patients", + "synonyms": ["medications", "pharmaceuticals"] + }, + { + "name": "diagnoses", + "type": "simple table", + "table path": "DERMTREATMENT.diagnoses", + "unique properties": ["_id", "code", "name"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "DIAG_ID", + "data type": "numeric", + "description": "The unique identifier for each diagnosis in the system", + "sample values": [1, 3, 5, 7, 9], + "synonyms": ["diagnosis key", "diagnosis id"] + }, + { + "name": "code", + "type": "table column", + "column name": "diag_code", + "data type": "string", + "description": "The code of the diagnosis, which is a unique identifier assigned to each diagnosis by the ICD-10 system", + "sample values": ["L40.0", "L40.59", "F40.8", "L40.50"], + "synonyms": ["diagnosis code", "ICD-10 code"] + }, + { + "name": "name", + "type": "table column", + "column name": "diag_name", + "data type": "string", + "description": "Name of the diagnosis, in medical terms", + "sample values": ["Psoriasis vulgaris", "Generalized pustular psoriasis", "Psoriasis, unspecified", "Other psoriatic arthropathy", "Psoriatic arthritis mutilans"], + "synonyms": ["title"] + }, + { + "name": "description", + "type": "table column", + "column name": "diag_desc", + "data type": "string", + "description": "An elaboration of the diagnosis, in more layman's terms and/or with some symptoms included", + "sample values": ["Plaque psoriasis, the most common form", "Psoriasis not further specified", "Includes flexural, erythrodermic, and other rare types", "Small, teardrop-shaped lesions", "Widespread pustules on top of red skin"], + "synonyms": ["details", "symptoms", "explanation"] + } + ], + "description": "Records of each diagnosis in the system, which are identified when treating patients", + "synonyms": ["medical conditions", "diagnostic codes"] + }, + { + "name": "treatments", + "type": "simple table", + "table path": "DERMTREATMENT.treatments", + "unique properties": ["treatment_id"], + "properties": [ + { + "name": "treatment_id", + "type": "table column", + "column name": "treatment_id", + "data type": "numeric", + "description": "Unique identifier for each treatment record in the system", + "sample values": [1, 4, 7, 16, 26], + "synonyms": ["treatment id", "treatment key"] + }, + { + "name": "patient_id", + "type": "table column", + "column name": "patient_id", + "data type": "numeric", + "description": "The id of the patient who is receiving the treatment", + "sample values": [1, 3, 5, 7, 9], + "synonyms": ["patient key", "patient id"] + }, + { + "name": "doc_id", + "type": "table column", + "column name": "doc_id", + "data type": "numeric", + "description": "The id of the doctor who is administering the treatment", + "sample values": [1, 2, 4, 8], + "synonyms": ["doctor key", "doctor id"] + }, + { + "name": "drug_id", + "type": "table column", + "column name": "drug_id", + "data type": "numeric", + "description": "The id of the drug being used in the treatment", + "sample values": [1, 2, 3, 5], + "synonyms": ["drug key", "drug id"] + }, + { + "name": "diagnosis_id", + "type": "table column", + "column name": "diag_id", + "data type": "numeric", + "description": "The id of the diagnosis the patient is being treated for", + "sample values": [2, 4, 5, 6, 8], + "synonyms": ["diagnosis key", "diagnosis id"] + }, + { + "name": "start_date", + "type": "table column", + "column name": "start_dt", + "data type": "datetime", + "description": "The date the treatment began", + "synonyms": ["treatment start date", "treatment initiation date"] + }, + { + "name": "end_date", + "type": "table column", + "column name": "end_dt", + "data type": "datetime", + "description": "The date the treatment ended (null if ongoing)", + "synonyms": ["treatment end date", "treatment completion date"] + }, + { + "name": "is_placebo", + "type": "table column", + "column name": "is_placebo", + "data type": "bool", + "description": "True if the treatment is a placebo, false otherwise", + "synonyms": ["placebo treatment", "dummy treatment"] + }, + { + "name": "total_drug_amount", + "type": "table column", + "column name": "tot_drug_amt", + "data type": "numeric", + "description": "The total quantity of the drug used in the treatment, where the unit is specified in the drug_unit property", + "sample values": [240, 180, 720, 360], + "synonyms": ["amount of drug used", "total drug quantity", "total dosage"] + }, + { + "name": "drug_unit", + "type": "table column", + "column name": "drug_unit", + "data type": "string", + "description": "The unit used by total_drug_amount, which is either `mg` (for milligrams) or `g` (for grams)", + "sample values": ["mg", "g"], + "synonyms": ["dosage unit", "unit of measurement", "drug quantity scale factor"] + } + ], + "description": "The records of each treatment administered to patients in the system, which can be either a drug or a placebo", + "synonyms": ["treatment records", "medical treatments", "therapies"] + }, + { + "name": "outcomes", + "type": "simple table", + "table path": "DERMTREATMENT.outcomes", + "unique properties": ["outcome_id"], + "properties": [ + { + "name": "outcome_id", + "type": "table column", + "column name": "outcome_id", + "data type": "numeric", + "description": "The unique identifier for each treatment outcome in the system", + "sample values": [1, 5, 16, 20, 21], + "synonyms": ["outcome key", "outcome id"] + }, + { + "name": "treatment_id", + "type": "table column", + "column name": "treatment_id", + "data type": "numeric", + "description": "The id for the treatment that the outcome is related to", + "sample values": [3, 5, 9, 25, 26], + "synonyms": ["treatment key", "treatment id"] + }, + { + "name": "assessment_date", + "type": "table column", + "column name": "assess_dt", + "data type": "datetime", + "description": "The date when the outcome assessment was made", + "synonyms": ["date of assessment", "evaluation date"] + }, + { + "name": "day7_lesion_count", + "type": "table column", + "column name": "day7_lesion_cnt", + "data type": "numeric", + "description": "The number of lesions counted on the patient's skin at day 7 after treatment started", + "sample values": [20, 25, 18, 30, 22], + "synonyms": ["lesions observed on day 7"] + }, + { + "name": "day30_lesion_count", + "type": "table column", + "column name": "day30_lesion_cnt", + "data type": "numeric", + "description": "The number of lesions counted on the patient's skin at day 30 after treatment started (null if not enough time has passed)", + "sample values": [15, 18, 12, 25, 16], + "synonyms": ["lesions observed on day 30"] + }, + { + "name": "day100_lesion_count", + "type": "table column", + "column name": "day100_lesion_cnt", + "data type": "numeric", + "description": "The number of lesions counted on the patient's skin at day 100 after treatment started (null if not enough time has passed)", + "sample values": [5, 8, 3, 12, 6], + "synonyms": ["lesions observed on day 100"] + }, + { + "name": "day7_pasi_score", + "type": "table column", + "column name": "day7_pasi_score", + "data type": "numeric", + "description": "The PASI score calculated at day 7 after treatment started, (Psoriasis Area and Severity Index) which is a measure of the severity of psoriasis from 0 to 72", + "sample values": [12.5, 15.0, 10.8, 18.2, 13.1], + "synonyms": ["PASI on day 7"] + }, + { + "name": "day30_pasi_score", + "type": "table column", + "column name": "day30_pasi_score", + "data type": "numeric", + "description": "The PASI score calculated at day 30 after treatment started, (Psoriasis Area and Severity Index) which is a measure of the severity of psoriasis from 0 to 72 (null if not enough time has passed)", + "sample values": [8.2, 10.1, 6.4, 18.2, 13.1], + "synonyms": ["PASI on day 30"] + }, + { + "name": "day100_pasi_score", + "type": "table column", + "column name": "day100_pasi_score", + "data type": "numeric", + "description": "The PASI score calculated at day 100 after treatment started, (Psoriasis Area and Severity Index) which is a measure of the severity of psoriasis from 0 to 72 (null if not enough time has passed)", + "sample values": [2.1, 3.5, 1.2, 5.8, 2.6], + "synonyms": ["PASI on day 100"] + }, + { + "name": "day7_tewl", + "type": "table column", + "column name": "day7_tewl", + "data type": "numeric", + "description": "TEWL score (Transepidermal Water Loss) calculated at day 7 after treatment started, which is a measure of skin hydration", + "sample values": [18.2, 20.1, 16.5, 22.4], + "synonyms": ["TEWL on day 7"] + }, + { + "name": "day30_tewl", + "type": "table column", + "column name": "day30_tewl", + "data type": "numeric", + "description": "TEWL score (Transepidermal Water Loss) calculated at day 30 after treatment started, which is a measure of skin hydration, in n g/m^2/h (null if not enough time has passed)", + "sample values": [15.6, 17.2, 14.0, 19.1], + "synonyms": ["TEWL on day 30"] + }, + { + "name": "day100_tewl", + "type": "table column", + "column name": "day100_tewl", + "data type": "numeric", + "description": "TEWL score (Transepidermal Water Loss) calculated at day 100 after treatment started, which is a measure of skin hydration, in n g/m^2/h (null if not enough time has passed)", + "sample values": [12.1, 13.5, 10.8, 15.2], + "synonyms": ["TEWL on day 100"] + }, + { + "name": "day7_itch_vas", + "type": "table column", + "column name": "day7_itch_vas", + "data type": "numeric", + "description": "The itch visual analog scale (VAS) score at day 7 after treatment started, which is a measure of itch severity from 0 to 100", + "sample values": [60, 70, 55, 80, 65], + "synonyms": ["VAS on day 7"] + }, + { + "name": "day30_itch_vas", + "type": "table column", + "column name": "day30_itch_vas", + "data type": "numeric", + "description": "The itch visual analog scale (VAS) score at day 30 after treatment started, which is a measure of itch severity from 0 to 100 (null if not enough time has passed)", + "sample values": [40, 50, 35, 60, 45], + "synonyms": ["VAS on day 30"] + }, + { + "name": "day100_itch_vas", + "type": "table column", + "column name": "day100_itch_vas", + "data type": "numeric", + "description": "The itch visual analog scale (VAS) score at day 100 after treatment started, which is a measure of itch severity from 0 to 100 (null if not enough time has passed)", + "sample values": [20, 30, 15, 40, 25], + "synonyms": ["VAS on day 100"] + }, + { + "name": "day7_hfg", + "type": "table column", + "column name": "day7_hfg", + "data type": "numeric", + "description": "Hair growth factor (HGF) score at day 7 after treatment started, on a scale from 0 to 5", + "sample values": [1.5, 1.0, 2.0, 0.5, 1.2], + "synonyms": ["HFG on day 7"] + }, + { + "name": "day30_hfg", + "type": "table column", + "column name": "day30_hfg", + "data type": "numeric", + "description": "Hair growth factor (HGF) score at day 30 after treatment started, on a scale from 0 to 5 (null if not enough time has passed)", + "sample values": [2.5, 2.0, 3.0, 1.5, 2.2], + "synonyms": ["HFG on day 30"] + }, + { + "name": "day100_hfg", + "type": "table column", + "column name": "day100_hfg", + "data type": "numeric", + "description": "Hair growth factor (HGF) score at day 100 after treatment started, on a scale from 0 to 5 (null if not enough time has passed)", + "sample values": [4.0, 3.5, 4.5, 3.0, 3.8], + "synonyms": ["HFG on day 100"] + } + ], + "description": "The recorded outcomes of treatments, which can include the same treatment at different points in time if not enough time has elapsed to record the complete outcome for a treatment", + "synonyms": ["results", "treatment evaluations", "assessments"] + }, + { + "name": "concomitant_meds", + "type": "simple table", + "table path": "DERMTREATMENT.concomitant_meds", + "unique properties": ["_id", ["treatment_id", "medicine_name"]], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "The unique identifier for each concomitant medication record in the system", + "sample values": [1, 5, 12, 14, 15], + "synonyms": ["concomitant drug key", "concomitant drug id"] + }, + { + "name": "treatment_id", + "type": "table column", + "column name": "treatment_id", + "data type": "numeric", + "description": "The id of the treatment that the concomitant medication record is related to", + "sample values": [1, 4, 8, 11, 13, 14], + "synonyms": [] + }, + { + "name": "medicine_name", + "type": "table column", + "column name": "med_name", + "data type": "string", + "description": "The name of the medicine being taken by the patient as a concomitant drug", + "sample values": ["Acetaminophen", "Ibuprofen", "Loratadine", "Multivitamin", "Calcium supplement", "Vitamin D", "Hydrocortisone cream"], + "synonyms": ["concomitant drug name"] + }, + { + "name": "start_date", + "type": "table column", + "column name": "start_dt", + "data type": "string", + "description": "Date the concomitant drug treatment was started", + "synonyms": ["concomitant drug start date", "first date"] + }, + { + "name": "end_date", + "type": "table column", + "column name": "end_dt", + "data type": "string", + "description": "Date the concomitant drug treatment finished (null if ongoing)", + "synonyms": ["last date", "concomitant drug end date"] + }, + { + "name": "dose_amount", + "type": "table column", + "column name": "dose_amt", + "data type": "numeric", + "description": "The amount of the medicine taken per dose, where the unit is specified in the dose_unit property", + "sample values": [500, 200, 10, 1, 50], + "synonyms": ["amount of dose", "quantity per dose", "dose size"] + }, + { + "name": "dose_unit", + "type": "table column", + "column name": "dose_unit", + "data type": "string", + "description": "The unit used by dose_amount", + "sample values": ["mg", "g", "tablet", "IU"], + "synonyms": [] + }, + { + "name": "dose_frequency", + "type": "table column", + "column name": "freq_hrs", + "data type": "numeric", + "description": "The frequency of the dose in hours (null if one-time use)", + "sample values": [6, 24, 8, 12], + "synonyms": ["gap between doses", "administration hour intervals", "time between doses"] + } + ], + "description": "Records of medications that are taken by patients in addition to the treatment they are receiving during their treatment", + "synonyms": ["additional drugs", "ancillary medications", "concomitant drugs", "accompanying medications", "concurrent drugs"] + }, + { + "name": "adverse_events", + "type": "simple table", + "table path": "DERMTREATMENT.adverse_events", + "unique properties": ["_id"], + "properties": [ + { + "name": "_id", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "The unique identifier for each adverse event record in the system", + "sample values": [1, 3, 5, 7, 8], + "synonyms": ["adverse event key", "adverse event id"] + }, + { + "name": "treatment_id", + "type": "table column", + "column name": "treatment_id", + "data type": "numeric", + "description": "The id for the treatment that the adverse event occurred during", + "sample values": [2, 5, 9, 11, 14], + "synonyms": ["treatment key", "treatment id"] + }, + { + "name": "reported_date", + "type": "table column", + "column name": "reported_dt", + "data type": "datetime", + "description": "The date the adverse event was reported", + "synonyms": ["incident date", "date of reaction"] + }, + { + "name": "description", + "type": "table column", + "column name": "description", + "data type": "string", + "description": "Descriptor of the adverse event, which can be a single word or a phrase", + "sample values": ["Mild injection site reaction", "Headache, nausea", "Severe allergic reaction, hospitalization required", "Diarrhea"], + "synonyms": [] + } + ], + "description": "All records of adverse events that have been reported in the system related to a treatment", + "synonyms": ["incidents during treatment", "adverse reactions", "side effects"] + } + ], + "relationships": [ + { + "name": "doctor", + "type": "simple join", + "parent collection": "treatments", + "child collection": "doctors", + "singular": true, + "always matches": true, + "keys": { + "doc_id": [ + "doc_id" + ] + }, + "description": "The doctor who administered the treatment", + "synonyms": ["medical professional", "treatment administrator", "prescribing doctor"] + }, + { + "name": "prescribed_treatments", + "type": "reverse", + "original parent": "treatments", + "original property": "doctor", + "singular": false, + "always matches": false, + "description": "The treatments prescribed/administered by the doctor", + "synonyms": ["regimens", "treatment plans", "medical prescriptions"] + }, + { + "name": "patient", + "type": "simple join", + "parent collection": "treatments", + "child collection": "patients", + "singular": true, + "always matches": true, + "keys": { + "patient_id": [ + "patient_id" + ] + }, + "description": "The patient who the treatment was administered to", + "synonyms": ["trial subject", "participant", "client", "drug recipient"] + }, + { + "name": "treatments_received", + "type": "reverse", + "original parent": "treatments", + "original property": "patient", + "singular": false, + "always matches": false, + "description": "The drug treatment regimens the patient has been a part of", + "synonyms": ["Drug regimens", "medical treatments", "therapies", "treatment history"] + }, + { + "name": "drug", + "type": "simple join", + "parent collection": "treatments", + "child collection": "drugs", + "singular": true, + "always matches": true, + "keys": { + "drug_id": [ + "drug_id" + ] + }, + "description": "The drug being used as part of the treatment", + "synonyms": ["medication", "pharmaceutical", "drug product", "therapy"] + }, + { + "name": "treatments_used_in", + "type": "reverse", + "original parent": "treatments", + "original property": "drug", + "singular": false, + "always matches": false, + "description": "The treatments the drug was used in", + "synonyms": ["therapies", "uses", "regimens", "prescriptions"] + }, + { + "name": "diagnosis", + "type": "simple join", + "parent collection": "treatments", + "child collection": "diagnoses", + "singular": true, + "always matches": true, + "keys": { + "diagnosis_id": [ + "_id" + ] + }, + "description": "The diagnosis associated with the treatment which the drug is treating", + "synonyms": ["medical condition", "associated condition", "cause of treatment"] + }, + { + "name": "treatments_for", + "type": "reverse", + "original parent": "treatments", + "original property": "diagnosis", + "singular": false, + "always matches": false, + "description": "The drug treatment regimens targeted at a patient with this diagnosis", + "synonyms": [] + }, + { + "name": "outcome_records", + "type": "simple join", + "parent collection": "treatments", + "child collection": "outcomes", + "singular": false, + "always matches": false, + "keys": { + "treatment_id": [ + "treatment_id" + ] + }, + "description": "The reported outcomes of the treatment, which can include the same treatment at different points in time if not enough time has elapsed to record the complete outcome for a treatment", + "synonyms": ["quality reports", "evaluations", "assessments"] + }, + { + "name": "treatment", + "type": "reverse", + "original parent": "treatments", + "original property": "outcome_records", + "singular": true, + "always matches": true, + "description": "The treatment that the outcome record is reporting on", + "synonyms": ["regimen", "prescription", "medical trial"] + }, + { + "name": "concomitant_meds", + "type": "simple join", + "parent collection": "treatments", + "child collection": "concomitant_meds", + "singular": false, + "always matches": false, + "keys": { + "treatment_id": [ + "treatment_id" + ] + }, + "description": "The ancillary medications that were taken by a patient in addition to the treatment they are receiving during their treatment", + "synonyms": ["additional drugs", "ancillary medications", "concomitant drugs", "accompanying medications", "concurrent drugs"] + }, + { + "name": "treatment", + "type": "reverse", + "original parent": "treatments", + "original property": "concomitant_meds", + "singular": true, + "always matches": true, + "description": "The treatment that the administration of the concomitant medication was related to", + "synonyms": ["medical trial", "regimen", "main prescription"] + }, + { + "name": "adverse_events", + "type": "simple join", + "parent collection": "treatments", + "child collection": "adverse_events", + "singular": false, + "always matches": false, + "keys": { + "treatment_id": [ + "treatment_id" + ] + }, + "description": "Any incident events recorded in association with the treatment", + "synonyms": ["side effects", "medical incidents"] + }, + { + "name": "treatment", + "type": "reverse", + "original parent": "treatments", + "original property": "adverse_events", + "singular": true, + "always matches": true, + "description": "The treatment that the adverse event occurred during", + "synonyms": ["regimen", "medical trial", "prescription"] + } + ], + "additional definitions": [], + "verified pydough analysis": [], + "extra semantic info": {} + }, + { + "name": "Ewallet", + "version": "V2", + "collections": [ + { + "name": "users", + "type": "simple table", + "table path": "EWALLET.users", + "unique properties": ["uid", "username"], + "properties": [ + { + "name": "uid", + "type": "table column", + "column name": "uid", + "data type": "numeric", + "description": "The unique identifier for each user in the system", + "sample values": [1, 3, 5, 10, 11], + "synonyms": ["user id", "user key"] + }, + { + "name": "username", + "type": "table column", + "column name": "username", + "data type": "string", + "description": "The username for the user", + "sample values": ["john_doe", "michael_brown", "bizuser", "huang2143"], + "synonyms": ["name", "account profile"] + }, + { + "name": "email", + "type": "table column", + "column name": "email", + "data type": "string", + "description": "The email for the user", + "sample values": ["john.doe@email.com", "lisa.jones@email.com", "customerserv@shopsmart.biz", "huang2143@example.com", "contact@business.co"], + "synonyms": ["email address", "contact email"] + }, + { + "name": "phone_number", + "type": "table column", + "column name": "phone_number", + "data type": "string", + "description": "The phone number for the user (null if not provided), which always starts with a `+` and the country code and does not have dashes", + "sample values": ["+1234567890", "+9876543210", "+8091017161", "+8612345678901", "+6123456789"], + "synonyms": ["phone", "mobile number", "contact number"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "The timestamp when the user account was created", + "synonyms": ["account creation date", "registration datetime"] + }, + { + "name": "last_login_at", + "type": "table column", + "column name": "last_login_at", + "data type": "datetime", + "description": "The last time the user logged into their account (null if never logged in)", + "synonyms": ["most recent login datetime", "latest access timestmap"] + }, + { + "name": "user_type", + "type": "table column", + "column name": "user_type", + "data type": "string", + "description": "The type of user, which can be either `individual` or `business`", + "sample values": ["individual", "business"], + "synonyms": ["category", "classification"] + }, + { + "name": "status", + "type": "table column", + "column name": "status", + "data type": "string", + "description": "The status of the user account, which can be either `active`, `inactive`, `deleted` or `suspended`", + "sample values": ["active", "inactive", "suspended", "deleted"], + "synonyms": ["account state", "activity", "user status"] + }, + { + "name": "country", + "type": "table column", + "column name": "country", + "data type": "string", + "description": "The two-letter country code for the country of the user", + "sample values": ["US", "CA", "FR", "GB", "AU"], + "synonyms": ["nation"] + }, + { + "name": "address_billing", + "type": "table column", + "column name": "address_billing", + "data type": "string", + "description": "The billing address for the user, including street address, city, sometimes province/country, and postal code", + "sample values": ["123 Main St, Anytown US 12345", "456 Oak Rd, Toronto ON M1M2M2", "12 Rue Baptiste, Paris 75001", "12 Mardon Rd, Wellington 6012"], + "synonyms": ["billing location", "billing street address"] + }, + { + "name": "address_delivery", + "type": "table column", + "column name": "address_delivery", + "data type": "string", + "description": "The delivery address for the user, including street address, city, sometimes province/country, and postal code, or null if not provided", + "sample values": ["123 Main St, Anytown US 12345", "25 London Road, Manchester M12 4XY", "19 Smith St, Brunswick VIC 3056"], + "synonyms": [] + }, + { + "name": "kyc_status", + "type": "table column", + "column name": "kyc_status", + "data type": "string", + "description": "The KYC status, which can be either `approved`, `pending`, or `rejected`", + "sample values": ["approved", "pending", "rejected"], + "synonyms": ["approval status", "verification status"] + }, + { + "name": "kyc_verified_at", + "type": "table column", + "column name": "kyc_verified_at", + "data type": "datetime", + "description": "The date when the kyc status became verified (null if not provided)", + "synonyms": ["date of approval", "date of verification"] + } + ], + "description": "Every user in the system, which can be either an individual or a business", + "synonyms": ["accounts", "user profiles", "user accounts", "clients", "consumers"] + }, + { + "name": "merchants", + "type": "simple table", + "table path": "EWALLET.merchants", + "unique properties": ["mid", "name"], + "properties": [ + { + "name": "mid", + "type": "table column", + "column name": "mid", + "data type": "numeric", + "description": "Unique identifier for each merchant in the system", + "sample values": [1, 5, 10, 12, 15], + "synonyms": ["merchant key", "merchant id"] + }, + { + "name": "name", + "type": "table column", + "column name": "name", + "data type": "string", + "description": "The name of the merchant", + "sample values": ["TechMart", "FitLifeGear", "UrbanDining", "KidzPlayhouse", "GameRush"], + "synonyms": ["merchant name", "business name", "company name"] + }, + { + "name": "description", + "type": "table column", + "column name": "description", + "data type": "string", + "description": "The detailed description of what the merchant does or sells", + "sample values": ["Leading electronics retailer", "SaaS productivity tools for developers", "Cosmetics and beauty supplies", "Books and reading accessories"], + "synonyms": ["business industry description", "merchant product types", "market segment description", "company description"] + }, + { + "name": "website_url", + "type": "table column", + "column name": "website_url", + "data type": "string", + "description": "The URL to the merchant's website", + "sample values": ["https://fitlifegear.com", "https://hometechsolutions.net", "https://bookworms.co.uk", "https://www.zenhomegoods.com'"], + "synonyms": ["website link", "web address"] + }, + { + "name": "logo_url", + "type": "table column", + "column name": "logo_url", + "data type": "string", + "description": "The URL to the merchant's logo image", + "sample values": ["https://www.techmart.com/logo.png", "https://www.fashiontrend.com/logo.png", "https://gamerush.co/gr-logo.png", "https://kidzplayhouse.com/logo.png"], + "synonyms": ["image link", "logo image address"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "The timestamp when the merchant was registered in the system", + "synonyms": ["merchant registration date", "merchant creation date"] + }, + { + "name": "country", + "type": "table column", + "column name": "country", + "data type": "string", + "description": "The two-letter capitalized country code for the country of the merchant", + "sample values": ["US", "CA", "IT", "FR", "UK"], + "synonyms": ["nation"] + }, + { + "name": "state", + "type": "table column", + "column name": "state", + "data type": "string", + "description": "The state for the merchant within the country, or null if not applicable", + "sample values": ["California", "Ontario", "Catalonia", "New South Wales", "British Columbia"], + "synonyms": ["province"] + }, + { + "name": "city", + "type": "table column", + "column name": "city", + "data type": "string", + "description": "The city for the merchant's location within the country", + "sample values": ["Los Angeles", "Berlin", "Rome", "Vancouver", "Sydney"] + }, + { + "name": "postal_code", + "type": "table column", + "column name": "postal_code", + "data type": "string", + "description": "The postal/zip code of the merchant's location within the city", + "sample values": ["90011", "M5V2J2", "1010", "94105"], + "synonyms": ["zip code"] + }, + { + "name": "address", + "type": "table column", + "column name": "address", + "data type": "string", + "description": "The full address of the merchant, including street address, city, sometimes province, and postal code", + "sample values": ["645 Wilshire Blvd, Los Angeles CA 90011", "159 Franklin St, Melbourne VIC 3004", "350 Mission St, San Francisco CA 94105", "35 Rue du Faubourg Saint-Antoine, 75011 Paris", "Passeig de Gracia 35, Barcelona 08003"], + "synonyms": ["street address", "full location"] + }, + { + "name": "status", + "type": "table column", + "column name": "status", + "data type": "string", + "description": "The status of the merchant account, which can be either `active`, `inactive` or `suspended`", + "sample values": ["active", "inactive", "suspended"], + "synonyms": ["account state", "activity", "merchant status"] + }, + { + "name": "category", + "type": "table column", + "column name": "category", + "data type": "string", + "description": "The blanket category/industry/market segment that the merchant is in", + "sample values": ["retail (hardware)", "Food & Dining", "Travel & Hospitality", "Retail", "Business Services"], + "synonyms": ["industry", "high-level market segment"] + }, + { + "name": "sub_category", + "type": "table column", + "column name": "sub_category", + "data type": "string", + "description": "The more specific category/industry/market segment that the merchant is in within the category", + "sample values": ["Electronics", "Sporting GOods", "Restaurants", "Accommodation", "Toys & Games", "Books", "Pets", "Groceries"], + "synonyms": ["sub-industry", "specific market segment"] + }, + { + "name": "merchant_category_code", + "type": "table column", + "column name": "mcc", + "data type": "numeric", + "description": "The category code for the merchant, which is a 4-digit number that identifies the type of business", + "sample values": [5734, 5977, 7011, 5719, 5732], + "synonyms": [] + }, + { + "name": "contact_name", + "type": "table column", + "column name": "contact_name", + "data type": "string", + "description": "The full name of the individual who is the primary contact for the merchant", + "sample values": ["John Jacobs", "Daniel Lee", "Marco Rossi", "Sophia Turner"], + "synonyms": ["contact person", "contact individual"] + }, + { + "name": "contact_email", + "type": "table column", + "column name": "contact_email", + "data type": "string", + "description": "The email of the contact person for the merchant", + "sample values": ["jjacobs@techmart.com", "dlee@greengourmet.com", "ebrown@hometechsolutions.net", "sturner@bookworms.co.uk", "mschmidt@codesuite.io", "agarcia@handycraft.store"], + "synonyms": ["contact email", "contact person email"] + }, + { + "name": "contact_phone", + "type": "table column", + "column name": "contact_phone", + "data type": "string", + "description": "The phone number of the contact person, which always starts with a `+` and the country code and does not have dashes", + "sample values": ["+15551234567", "+49301234567", "+441612345678", "+61298765432", "+442078912345"], + "synonyms": ["contact phone number", "contact person phone"] + } + ], + "description": "The merchant accounts registered in the system", + "synonyms": ["companies", "businesses", "vendors", "retailers", "stores"] + }, + { + "name": "coupons", + "type": "simple table", + "table path": "EWALLET.coupons", + "unique properties": ["cid"], + "properties": [ + { + "name": "cid", + "type": "table column", + "column name": "cid", + "data type": "numeric", + "description": "The unique identifier for each coupon in the system", + "sample values": [1, 4, 5, 7, 10], + "synonyms": ["coupon key", "coupon id"] + }, + { + "name": "merchant_id", + "type": "table column", + "column name": "merchant_id", + "data type": "numeric", + "description": "The id of the merchant who issues the coupon", + "sample values": [2, 5, 7, 8, 9], + "synonyms": ["merchant key", "merchant id", "issuing business id"] + }, + { + "name": "code", + "type": "table column", + "column name": "code", + "data type": "string", + "description": "The code used to enter the coupon", + "sample values": ["TECH20", "DINEDISCOUNT", "HOME15", "GLOWUP", "GAMERALERT"], + "synonyms": ["coupon code", "discount code", "promo code"] + }, + { + "name": "description", + "type": "table column", + "column name": "description", + "data type": "string", + "description": "A verbal description of what hte coupon is for", + "sample values": ["20% off tech and electronics", "Buy 2 get 1 free on cosmetics'", "Get 25% off accessories", "$10 off $75+ purchase"], + "synonyms": [] + }, + { + "name": "start_date", + "type": "table column", + "column name": "start_date", + "data type": "datetime", + "description": "The date when the coupon first became valid", + "synonyms": ["available since", "valid from"] + }, + { + "name": "end_date", + "type": "table column", + "column name": "end_date", + "data type": "datetime", + "description": "The date when the coupon is no longer valid", + "synonyms": ["available until", "valid until", "expiration date"] + }, + { + "name": "discount_type", + "type": "table column", + "column name": "discount_type", + "data type": "string", + "description": "The category of the coupon, which can be either `percentage` or `fixed_amount`", + "sample values": ["percentage", "fixed_amount"], + "synonyms": ["classification", "category"] + }, + { + "name": "discount_value", + "type": "table column", + "column name": "discount_value", + "data type": "numeric", + "description": "The value of the discount, which is the percentage off if the discount type is `percentage`, or the number of dollars off if `fixed_amount`", + "sample values": [20.0, 30.0, 10.0, 15.0, 25.0], + "synonyms": ["dollars off", "percentage off", "amount", "size"] + }, + { + "name": "min_purchase_amount", + "type": "table column", + "column name": "min_purchase_amount", + "data type": "numeric", + "description": "The minimum amount of money that must be spent on a purchase in USD to be eligible for the coupon (null if not applicable)", + "sample values": [100.0, 50.0, 1000.0, 150.0], + "synonyms": ["minimum spend", "minimum purchase", "minimum order price"] + }, + { + "name": "max_discount_amount", + "type": "table column", + "column name": "max_discount_amount", + "data type": "numeric", + "description": "The maximum amount of money that can be saved using the coupon in USD (null if not applicable)", + "sample values": [50.0, 300.0, 10.0, 15.0], + "synonyms": ["promotion limit", "discount cap", "maximum savings"] + }, + { + "name": "redemption_limit", + "type": "table column", + "column name": "redemption_limit", + "data type": "numeric", + "description": "The maximum number of times the coupon can be redeemed before expiring/becoming invalid (null if not applicable)", + "sample values": [500, 1000, 200, 300, 750], + "synonyms": ["maximum number of uses", "redemption cap", "usage limit"] + }, + { + "name": "status", + "type": "table column", + "column name": "status", + "data type": "string", + "description": "The status of the coupon, which can be either `active`, `inactive`, or `expired`", + "sample values": ["active", "inactive", "expired"], + "synonyms": ["availability", "usability", "activity state"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "Timestamp when the coupon record was created in the system", + "synonyms": ["creation datetime"] + }, + { + "name": "updated_at", + "type": "table column", + "column name": "updated_at", + "data type": "datetime", + "description": "Timestamp when the coupon record was last updated in the system (null if never updated after creation)", + "synonyms": ["update datetime"] + } + ], + "description": "The records of coupons that have been issued by merchants in the system", + "synonyms": ["discounts", "promotions", "special offers"] + }, + { + "name": "transactions", + "type": "simple table", + "table path": "EWALLET.wallet_transactions_daily", + "unique properties": ["txid", "transaction_ref"], + "properties": [ + { + "name": "txid", + "type": "table column", + "column name": "txid", + "data type": "numeric", + "description": "The unique identifier for each transaction in the system (the original transaction & coupon applied are different rows with different values of this key)", + "sample values": [1, 3, 17, 21, 26], + "synonyms": ["transaction key", "transaction id"] + }, + { + "name": "sender_id", + "type": "table column", + "column name": "sender_id", + "data type": "numeric", + "description": "The id of the key of the individual who sent the transaction, which is either the id of a user or a merchant, depending on sender_type", + "sample values": [1, 3, 8, 4, 10], + "synonyms": [] + }, + { + "name": "sender_type", + "type": "table column", + "column name": "sender_type", + "data type": "numeric", + "description": "Which type of individual sent the transaction: 0 for user (meaning sender_id is a uid from users) or 1 for merchant (meaning sender_id is a mid from merchants)", + "sample values": [0, 1], + "synonyms": ["sent by merchant"] + }, + { + "name": "receiver_id", + "type": "table column", + "column name": "receiver_id", + "data type": "numeric", + "description": "The id of the key of the individual who received the transaction, which is either the id of a user or a merchant, depending on receiver_type", + "sample values": [1, 9, 10, 2, 5], + "synonyms": [] + }, + { + "name": "receiver_type", + "type": "table column", + "column name": "receiver_type", + "data type": "numeric", + "description": "Which type of individual received the transaction: 0 for user (meaning receiver_id is a uid from users) or 1 for merchant (meaning receiver_id is a mid from merchants)", + "sample values": [0, 1], + "synonyms": ["received by merchant"] + }, + { + "name": "amount", + "type": "table column", + "column name": "amount", + "data type": "numeric", + "description": "The dollar amount of the transaction, which is the total amount for a record without a coupon applied, or the amount saved/rebated when the coupon is applied", + "sample values": [99.99, 20.0, 16.0, 125.50, 10.0], + "synonyms": [] + }, + { + "name": "status", + "type": "table column", + "column name": "status", + "data type": "string", + "description": "The status of the transaction, which can be either `success`, `failed`, `refunded`, or `pending`", + "sample values": ["success", "failed", "pending", "refunded"], + "synonyms": ["outcome", "result", "transaction status"] + }, + { + "name": "transaction_type", + "type": "table column", + "column name": "type", + "data type": "string", + "description": "The means the transaction was made, which can be either `credit` or `debit", + "sample values": ["credit", "debit"], + "synonyms": ["payment method", "credit or debit"] + }, + { + "name": "description", + "type": "table column", + "column name": "description", + "data type": "string", + "description": "The category of transaction, which either says what was purchased or 'Coupon discount' if the transaction is a coupon discount being applied to a previous transaction in the system", + "sample values": ["Online purchase", "Coupon discount", "Product purchase", "Order #438721", "Villa rental deposit", "Refund on order #1234"], + "synonyms": ["description of purchase", "purchase subject"] + }, + { + "name": "coupon_id", + "type": "table column", + "column name": "coupon_id", + "data type": "numeric", + "description": "The id of the coupon being used in the transaction, or null if the transaction is not a coupon discount", + "sample values": [1, 3, 9, 10], + "synonyms": ["coupon key", "coupon id"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "The timestamp when the transfer was created in the system", + "synonyms": ["creation datetime"] + }, + { + "name": "completed_at", + "type": "table column", + "column name": "completed_at", + "data type": "datetime", + "description": "The timestamp when the transfer was completed (null if not completed yet)", + "synonyms": ["completion datetime"] + }, + { + "name": "transaction_ref", + "type": "table column", + "column name": "transaction_ref", + "data type": "string", + "description": "Randomly generated uuid4 for users' reference", + "sample values": ["ad154bf7-8185-4230-a8d8-3ef59b4e0012", "kd454bf7-428d-eig2-a8d8-3ef59b4e0012", "a7659c81-0cd0-4635-af6c-cf68d2c15ab2'"], + "synonyms": ["transaction uuid", "reference number"] + }, + { + "name": "gateway_name", + "type": "table column", + "column name": "gateway_name", + "data type": "string", + "description": "The portal through which the transaction was made, which can be either 'Stripe', 'PayPal', 'Checkout.com', 'Braintree' or 'Adyen'", + "sample values": ["Stripe", "PayPal", "Checkout.com", "Braintree", "Adyen"], + "synonyms": ["payment gateway", "payment processor", "payment portal"] + }, + { + "name": "gateway_ref", + "type": "table column", + "column name": "gateway_ref", + "data type": "string", + "description": "A reference number generated by the payment gateway for the transaction, which is used to track the transaction in the payment processor's system", + "sample values": ["tx_123abc456def", "rfnd_xkt521", "sub_pjj908", "stripe_ref_11_1", "paypal_ref_12_1"], + "synonyms": ["gateway reference number", "payment processor reference"] + }, + { + "name": "device_id", + "type": "table column", + "column name": "device_id", + "data type": "string", + "description": "The unique identifier for the device used to make the transaction", + "sample values": ["mobile_8fh2k1", "web_8902wknz", "web_zld22f", "device_11_1", "mobile_1av8p0"], + "synonyms": ["device key", "device identifier", "device number"] + }, + { + "name": "ip_address", + "type": "table column", + "column name": "ip_address", + "data type": "string", + "description": "The IP address of the device used to make the transaction", + "sample values": ["199.59.148.201", "199.59.148.201", "70.121.39.25", "8.26.53.165"], + "synonyms": ["IP"] + }, + { + "name": "user_agent", + "type": "table column", + "column name": "user_agent", + "data type": "string", + "description": "The user agent used to make the transaction, which is a string that identifies the browser, operating system, and device type of the device used to make the transaction", + "sample values": ["Mozilla/5.0 (iPhone; CPU iPhone OS 16_3_1 like Mac OS X) ...", "Mozilla/5.0 (Linux; Android 13; SM-S901B) ...", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ..."], + "synonyms": ["device information", "browser information", "device details"] + } + ], + "description": "Every transaction that has occurred in the system, where the source can be either a user or a merchant and the destination can be either a user or a merchant. If a coupon is used, there will be two identical transaction records, one for the original transaction (where the coupon id is null) and one for the coupon discount applied (where the coupon id is provided)", + "synonyms": ["transfers", "payments", "purchases"] + }, + { + "name": "user_balances", + "type": "simple table", + "table path": "EWALLET.wallet_user_balance_daily", + "unique properties": [["user_id", "updated_at"]], + "properties": [ + { + "name": "user_id", + "type": "table column", + "column name": "user_id", + "data type": "numeric", + "description": "The id of the user whose account balance is being recorded", + "sample values": [1, 4, 9, 10, 5], + "synonyms": ["user id", "user key"] + }, + { + "name": "balance", + "type": "table column", + "column name": "balance", + "data type": "numeric", + "description": "The balance of the user's account in USD on the date of the record", + "sample values": [525.80, -725.55, -55.99, -559.98, 0.00, 2733.92], + "synonyms": ["amount in account", "user balance", "account value"] + }, + { + "name": "updated_at", + "type": "table column", + "column name": "updated_at", + "data type": "datetime", + "description": "The timestamp when the user's balance was recorded", + "synonyms": ["update datetime", "balance record date"] + } + ], + "description": "The records of users' account balances in the system, which can include negative balances and multiple records for the same user made at different points in time", + "synonyms": ["user balance histories", "user balance records", "user account balances"] + }, + { + "name": "merchant_balances", + "type": "simple table", + "table path": "EWALLET.wallet_merchant_balance_daily", + "unique properties": [["merchant_id", "updated_at"]], + "properties": [ + { + "name": "merchant_id", + "type": "table column", + "column name": "merchant_id", + "data type": "numeric", + "description": "The id of the merchant whose account balance is being recorded", + "sample values": [2, 4, 5, 9, 1], + "synonyms": ["merchant key", "merchant id"] + }, + { + "name": "balance", + "type": "table column", + "column name": "balance", + "data type": "numeric", + "description": "The balance of the user's account in USD on the date of the record", + "sample values": [3897.99, 29.95, 0.00, 89.99], + "synonyms": ["amount in account", "merchant balance", "account value"] + }, + { + "name": "updated_at", + "type": "table column", + "column name": "updated_at", + "data type": "datetime", + "description": "The timestamp when the merchant's balance was recorded", + "synonyms": ["update datetime", "balance record date"] + } + ], + "description": "The records of merchants' account balances in the system, which can include negative balances and multiple records for the same merchant made at different points in time", + "synonyms": ["merchant balance histories", "merchant balance records", "merchant account balances"] + }, + { + "name": "notifications", + "type": "simple table", + "table path": "EWALLET.notifications", + "unique properties": ["notif_id"], + "properties": [ + { + "name": "notif_id", + "type": "table column", + "column name": "id", + "data type": "numeric", + "description": "The unique identifier for each notification in the system", + "sample values": [1, 5, 10, 14, 16], + "synonyms": ["notification key", "notification id"] + }, + { + "name": "user_id", + "type": "table column", + "column name": "user_id", + "data type": "numeric", + "description": "The id for the user who received the notification", + "sample values": [3, 5, 9, 10], + "synonyms": ["user key", "user id"] + }, + { + "name": "message", + "type": "table column", + "column name": "message", + "data type": "string", + "description": "The message contained in the notification sent to the user", + "sample values": ["Your order #123abc has been shipped!", "Your CodeSuite subscription will renew on 7/1", "Here''s $10 to start your glow up!'", "Reminder: Your FitLife membership expires in 7 days", "An order from UrbanDining was unsuccessful", "Weekend Flash Sale: 25% off all activewear!"], + "synonyms": ["contents", "text", "description"] + }, + { + "name": "notification_type", + "type": "table column", + "column name": "type", + "data type": "string", + "description": "The type of notification message, which can be either `transaction`, `promotion`, or `general`", + "sample values": ["transaction", "promotion", "general"], + "synonyms": ["message category", "alert classification"] + }, + { + "name": "status", + "type": "table column", + "column name": "status", + "data type": "string", + "description": "The status of the notification, which can be either `read`, `unread`, or `archived`", + "sample values": ["read", "unread", "archived"], + "synonyms": ["has been read"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "The timestamp when the notification was created", + "synonyms": ["creation datetime", "notification creation date"] + }, + { + "name": "read_at", + "type": "table column", + "column name": "read_at", + "data type": "datetime", + "description": "The timestamp when the notification was read (null if not read yet)", + "synonyms": ["datetime read"] + }, + { + "name": "device_type", + "type": "table column", + "column name": "device_type", + "data type": "string", + "description": "The type of device/medium the notification was sent through, which can be either `mobile_app`, `email`, `sms`, or `web_app`", + "sample values": ["mobile_app", "email", "sms", "web_app"], + "synonyms": [] + }, + { + "name": "device_id", + "type": "table column", + "column name": "device_id", + "data type": "string", + "description": "The id of the device the notification was sent to, or null if the device type does not have device ids", + "sample values": ["mobile_8fh2k1", "web_d8180kaf", "mobile_8fh2k1"], + "synonyms": [] + }, + { + "name": "action_url", + "type": "table column", + "column name": "action_url", + "data type": "string", + "description": "The URL included in hte notification that the user can click on to take action; can be external https or deeplink url within the app", + "sample values": ["app://orders/123abc", "https://zenhomesurvey.com/order/c51e10d1", "https://kidzplayhouse.com/new-arrivals", "https://techmart.com/promo/TECH20"], + "synonyms": ["link", "notification hyperlink"] + } + ], + "description": "All notifications sent to users in the system, such as alerts, reminders, and promotional messages", + "synonyms": ["alerts", "messages"] + }, + { + "name": "user_sessions", + "type": "simple table", + "table path": "EWALLET.user_sessions", + "unique properties": [["user_id", "session_start"], ["user_id", "session_end"]], + "properties": [ + { + "name": "user_id", + "type": "table column", + "column name": "user_id", + "data type": "numeric", + "description": "The id of the user who the session belongs to", + "sample values": [1, 2, 3, 8, 10], + "synonyms": ["user id", "user key"] + }, + { + "name": "session_start", + "type": "table column", + "column name": "session_start_ts", + "data type": "datetime", + "description": "The timestamp when the user session started", + "synonyms": ["beginning timestamp", "session initialization datetime"] + }, + { + "name": "session_end", + "type": "table column", + "column name": "session_end_ts", + "data type": "datetime", + "description": "The timestamp when the user session ended", + "synonyms": ["ending timestamp", "session termination datetime"] + }, + { + "name": "device_type", + "type": "table column", + "column name": "device_type", + "data type": "string", + "description": "The type of device the session was made with, which can be either `web_app` or `mobile_app`", + "sample values": ["web_app", "mobile_app"], + "synonyms": ["session device", "web or mobile"] + }, + { + "name": "device_id", + "type": "table column", + "column name": "device_id", + "data type": "string", + "description": "The id of the device the session was made with", + "sample values": ["web_d8180kaf", "mobile_g3mjfz", "web_zz91p44l", "web_8902wknz"], + "synonyms": [] + } + ], + "description": "The records of user sessions in the system, which can include multiple sessions for the same user made at different points in time, each session with a start/end timestamp", + "synonyms": ["system logs", "user activity logs", "user session records"] + }, + { + "name": "user_setting_snapshots", + "type": "simple table", + "table path": "EWALLET.user_setting_snapshot", + "unique properties": [["user_id", "snapshot_date"]], + "properties": [ + { + "name": "user_id", + "type": "table column", + "column name": "user_id", + "data type": "numeric", + "description": "The id for the user whose settings are being recorded", + "sample values": [1, 2, 3, 5, 9], + "synonyms": ["user key", "user id"] + }, + { + "name": "snapshot_date", + "type": "table column", + "column name": "snapshot_date", + "data type": "datetime", + "description": "The date when the setting snapshot was taken", + "synonyms": ["date of setting update", "recorded date"] + }, + { + "name": "daily_transaction_limit", + "type": "table column", + "column name": "tx_limit_daily", + "data type": "numeric", + "description": "The cap in settings of how much the user account allows to be spent in a single day", + "sample values": [1000.0, 500.0, 50.0, 250.0], + "synonyms": ["daily spending cap"] + }, + { + "name": "monthly_transaction_limit", + "type": "table column", + "column name": "tx_limit_monthly", + "data type": "numeric", + "description": "The cap in settings of how much the user account allows to be spent in a single month", + "sample values": [5000.0, 500.0, 2000.0, 1000.0], + "synonyms": ["monthly spending cap"] + }, + { + "name": "membership_status", + "type": "table column", + "column name": "membership_status", + "data type": "numeric", + "description": "The member's status level within the system: 0 for bronze, 1 for silver, 2 for gold, 3 for platinum, 4 for VIP", + "sample values": [0, 1, 2, 3, 4], + "synonyms": ["rank", "membership level", "membership tier"] + }, + { + "name": "password_hash", + "type": "table column", + "column name": "password_hash", + "data type": "string", + "description": "The hashed password of the user, which is used to verify the user's identity when they log in", + "sample values": ["bcryptHash($2yz9!&ka1)", "bcryptHash($2yz9!&ka1)", "bcryptHash(C0d3Rul3z!99)"], + "synonyms": [] + }, + { + "name": "api_key", + "type": "table column", + "column name": "api_key", + "data type": "string", + "description": "The user's API key, which is used to authenticate the user when they make API requests (null if not provided)", + "sample values": ["9d61c49b-8977-4914-a36b-80d1445e38fa", "6c03c175-9ac9-4854-b064-a3fff2c62e31"], + "synonyms": ["api token", "application programming interface key"] + }, + { + "name": "verified_devices", + "type": "table column", + "column name": "verified_devices", + "data type": "string", + "description": "A comma-separated list of the ids of the devices that have been verified for the user (null if not provided)", + "sample values": ["mobile_8fh2k1", "mobile_yjp08q, mobile_1av8p0", "web_k29qjd, mobile_x28qlj", "web_d8180kaf, mobile_q3mz8n"], + "synonyms": ["device list", "device ids"] + }, + { + "name": "verified_ips", + "type": "table column", + "column name": "verified_ips", + "data type": "string", + "description": "A comma-sparated list of the IP addresses that have been verified for the user (null if not provided)", + "sample values": ["8.26.53.165, 68.85.32.201", "203.96.81.36", "192.168.0.1", "198.51.100.233, 70.121.39.25"], + "synonyms": ["IP addresses", "IP list"] + }, + { + "name": "mfa_enabled", + "type": "table column", + "column name": "mfa_enabled", + "data type": "bool", + "description": "Whether the user has enabled multi-factor authentication (MFA) for their account", + "synonyms": ["has mfa"] + }, + { + "name": "marketing_opt_in", + "type": "table column", + "column name": "marketing_opt_in", + "data type": "bool", + "description": "Whether the user has opted in to receive marketing communications", + "synonyms": ["allowed marketing"] + }, + { + "name": "created_at", + "type": "table column", + "column name": "created_at", + "data type": "datetime", + "description": "The timestamp when the record was added to the system", + "synonyms": ["system entry date"] + } + ], + "description": "The records of user settings in the system, which can include multiple updates for the same user made at different points in time", + "synonyms": ["user settings history", "user settings records"] + } + ], + "relationships": [ + { + "name": "transactions_sent", + "type": "simple join", + "parent collection": "users", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "sender_id" + ] + }, + "description": "All transactions made where the user is the sender", + "synonyms": ["transactions from user"] + }, + { + "name": "sending_user", + "type": "reverse", + "original parent": "users", + "original property": "transactions_sent", + "singular": true, + "always matches": false, + "description": "The user who sent the transaction, if it was sent by a user (not found if it was sent by a merchant)", + "synonyms": ["source user", "originating user"] + }, + { + "name": "transactions_received", + "type": "simple join", + "parent collection": "users", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "receiver_id" + ] + }, + "description": "The transactions where the user was the recipient", + "synonyms": ["transactions to user"] + }, + { + "name": "receiving_user", + "type": "reverse", + "original parent": "users", + "original property": "transactions_received", + "singular": true, + "always matches": false, + "description": "The user who received the transaction, if it was received by a user (not found if it was received by a merchant)", + "synonyms": ["destination user", "receiving user account"] + }, + { + "name": "balances", + "type": "simple join", + "parent collection": "users", + "child collection": "user_balances", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "user_id" + ] + }, + "description": "The historical account balance for the user, which can include multiple updates over time", + "synonyms": ["historical account balance", "user balance history"] + }, + { + "name": "user", + "type": "reverse", + "original parent": "users", + "original property": "balances", + "singular": true, + "always matches": true, + "description": "The user who the account balance belongs to", + "synonyms": ["customer"] + }, + { + "name": "notifications", + "type": "simple join", + "parent collection": "users", + "child collection": "notifications", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "user_id" + ] + }, + "description": "The notifications that have been sent to the user", + "synonyms": ["user notifications", "user alerts"] + }, + { + "name": "user", + "type": "reverse", + "original parent": "users", + "original property": "notifications", + "singular": true, + "always matches": true, + "description": "The user who received the notification", + "synonyms": ["customer", "client", "user account"] + }, + { + "name": "sessions", + "type": "simple join", + "parent collection": "users", + "child collection": "user_sessions", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "user_id" + ] + }, + "description": "The login sessions by the user", + "synonyms": ["user login sessions", "session history"] + }, + { + "name": "user", + "type": "reverse", + "original parent": "users", + "original property": "sessions", + "singular": true, + "description": "The user who the login session information belongs to", + "synonyms": ["customer", "client", "user account"] + }, + { + "name": "setting_snapshots", + "type": "simple join", + "parent collection": "users", + "child collection": "user_setting_snapshots", + "singular": false, + "always matches": false, + "keys": { + "uid": [ + "user_id" + ] + }, + "description": "The setting snapshot history for the user, which can include multiple updates over time", + "synonyms": ["setting history", "user setting snapshots"] + }, + { + "name": "user", + "type": "reverse", + "original parent": "users", + "original property": "setting_snapshots", + "singular": true, + "description": "The user who the setting snapshot belongs to", + "synonyms": ["customer", "client", "user account"] + }, + { + "name": "transactions_sent", + "type": "simple join", + "parent collection": "merchants", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "mid": [ + "sender_id" + ] + }, + "description": "The transactions sent by the merchant", + "synonyms": ["merchant transactions sent", "merchant outgoing transactions"] + }, + { + "name": "sending_merchant", + "type": "reverse", + "original parent": "merchants", + "original property": "transactions_sent", + "singular": true, + "always matches": false, + "description": "The merchant who sent the transaction, if it was sent by a merchant (not found if it was sent by a user)", + "synonyms": ["merchant source", "merchant origin"] + }, + { + "name": "transactions_received", + "type": "simple join", + "parent collection": "merchants", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "mid": [ + "receiver_id" + ] + }, + "description": "The transactions received by the merchant", + "synonyms": ["merchant transactions received", "merchant incoming transactions"] + }, + { + "name": "receiving_merchant", + "type": "reverse", + "original parent": "merchants", + "original property": "transactions_received", + "singular": true, + "always matches": false, + "description": "The merchant who received the transaction, if it was received by a merchant (not found if it was received by a user)", + "synonyms": ["merchant destination", "merchant recipient"] + }, + { + "name": "balances", + "type": "simple join", + "parent collection": "merchants", + "child collection": "merchant_balances", + "singular": false, + "always matches": false, + "keys": { + "mid": [ + "merchant_id" + ] + }, + "description": "The historical account balance for the merchant, which can include multiple updates over time", + "synonyms": ["historical account balance", "merchant balance history"] + }, + { + "name": "merchant", + "type": "reverse", + "original parent": "merchants", + "original property": "balances", + "singular": true, + "always matches": true, + "description": "The merchant who the account balance information belongs to", + "synonyms": ["business", "company", "merchant account"] + }, + { + "name": "coupons", + "type": "simple join", + "parent collection": "merchants", + "child collection": "coupons", + "singular": false, + "always matches": false, + "keys": { + "mid": [ + "merchant_id" + ] + }, + "description": "The coupons issued by the merchant", + "synonyms": ["merchant coupons", "merchant discount codes"] + }, + { + "name": "merchant", + "type": "reverse", + "original parent": "merchants", + "original property": "coupons", + "singular": true, + "always matches": true, + "description": "The merchant who issued the coupon", + "synonyms": ["issuing company", "business"] + }, + { + "name": "transaction_used_in", + "type": "simple join", + "parent collection": "coupons", + "child collection": "transactions", + "singular": false, + "always matches": false, + "keys": { + "cid": [ + "coupon_id" + ] + }, + "description": "The transactions that the coupon was used in", + "synonyms": ["uses", "transactions with coupon"] + }, + { + "name": "coupon", + "type": "reverse", + "original parent": "coupons", + "original property": "transaction_used_in", + "singular": true, + "always matches": false, + "description": "The coupon that was used in the transaction, if one exists", + "synonyms": ["coupon used", "discount used"] + } + ], + "additional definitions": [], + "verified pydough analysis": [], + "extra semantic info": {} + } +] \ No newline at end of file diff --git a/tests/test_metadata/snowflake_sample_graphs.json b/tests/test_metadata/snowflake_sample_graphs.json new file mode 100644 index 000000000..785847394 --- /dev/null +++ b/tests/test_metadata/snowflake_sample_graphs.json @@ -0,0 +1,821 @@ +[ + { + "name": "TPCH", + "version": "V2", + "collections": [ + { + "name": "regions", + "type": "simple table", + "table path": "TPCH_SF1.REGION", + "unique properties": ["key"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "r_regionkey", + "data type": "numeric", + "description": "Unique identifier id for the region", + "sample values": [0, 1, 2, 3, 4], + "synonyms": ["id"] + }, + { + "name": "name", + "type": "table column", + "column name": "r_name", + "data type": "string", + "description": "Uppercase name of the region", + "sample values": ["AFRICA", "AMERICA", "ASIA", "EUROPE", "MIDDLE EAST"] + }, + { + "name": "comment", + "type": "table column", + "column name": "r_comment", + "data type": "string", + "description": "Comment/remark on the region" + } + ], + "description": "The regions of the world", + "synonyms": ["continents", "segments of the world"] + }, + { + "name": "nations", + "type": "simple table", + "table path": "TPCH_SF1.NATION", + "unique properties": ["key"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "n_nationkey", + "data type": "numeric", + "description": "Unique identifier id for the nation", + "sample values": [0, 1, 10, 13, 24], + "synonyms": ["id"] + }, + { + "name": "region_key", + "type": "table column", + "column name": "n_regionkey", + "data type": "numeric", + "description": "Key from the region that the nation belongs to", + "sample values": [0, 1, 2, 3, 4] + }, + { + "name": "name", + "type": "table column", + "column name": "n_name", + "data type": "string", + "description": "Uppercase name of the nation", + "sample values": ["KENYA", "PERU", "JAPAN", "INDIA", "GERMANY"] + }, + { + "name": "comment", + "type": "table column", + "column name": "n_comment", + "data type": "string", + "description": "Comment/remark on the nation" + } + ], + "description": "The nations of the world", + "synonyms": ["countries", "states"] + }, + { + "name": "parts", + "type": "simple table", + "table path": "TPCH_SF1.PART", + "unique properties": ["key"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "p_partkey", + "data type": "numeric", + "description": "Unique identifier id for the part", + "sample values": [0, 103719, 114994, 64760, 2440], + "synonyms": ["id"] + }, + { + "name": "name", + "type": "table column", + "column name": "p_name", + "data type": "string", + "description": "Name of the part, consisting of multiple lowercase colors", + "sample values": ["chiffon plum white linen firebrick", "chocolate steel antique green lavender", "sky frosted cream light blush"], + "synonyms": ["colors", "description"] + }, + { + "name": "manufacturer", + "type": "table column", + "column name": "p_mfgr", + "data type": "string", + "description": "Name of the manufacturer of the part (not the same as the suppliers). The manufacturer number is the same as the first digit of the part's brand number.", + "sample values": ["Manufacturer#1", "Manufacturer#2", "Manufacturer#3", "Manufacturer#4", "Manufacturer#5"] + }, + { + "name": "brand", + "type": "table column", + "column name": "p_brand", + "data type": "string", + "description": "The production brand that the part belongs to. The brand contains two digits where the first digit is the same as the manufacturer number (e.g. Brand#32 is from Manufacturer #3).", + "sample values": ["Brand#11", "Brand#23", "Brand#34", "Brand#45", "Brand#55"] + }, + { + "name": "part_type", + "type": "table column", + "column name": "p_type", + "data type": "string", + "description": "The type of the part, consisting of three uppercase descriptors where the first is a category (e.g. 'STANDARD' or 'PROMO'), the second is a processing state (e.g. 'ANODIZED' or 'PLATED') and the third is a material (e.g. 'STEEL' or 'BRASS').", + "sample values": ["ECONOMY ANODIZED NICKEL", "PROMO BURNISHED COPPER", "STANDARD BRUSHED STEEL", "SMALL PLATED TIN", "LARGE POLISHED BRASS", "MEDIUM PLATED NICKEL"], + "synonyms": ["category", "descriptor", "processing", "material"] + }, + { + "name": "size", + "type": "table column", + "column name": "p_size", + "data type": "numeric", + "description": "The size of the part", + "sample values": [1, 10, 31, 46, 50], + "synonyms": ["dimension", "measurement", "length", "width", "height", "volume"] + }, + { + "name": "container", + "type": "table column", + "column name": "p_container", + "data type": "string", + "description": "The container that the part is stored in. The container consists of two uppercase descriptors where the first is a size (e.g. 'SM' or 'JUMBO') and the second is a type of container (e.g. 'BOX' or 'JAR').", + "sample values": ["SM CASE", "LG BOX", "MED BAG", "JUMBO JAR", "WRAP PKG", "SM PACK", "LG CAN", "MED DRUM"], + "synonyms": ["vessel", "packaging", "receptacle"] + }, + { + "name": "retail_price", + "type": "table column", + "column name": "p_retailprice", + "data type": "numeric", + "description": "The retail price of the part, which it is intended to be sold for before accounting for the price the supplier charges, in US dollars. The price is rounded to the nearest cent, and most of the values are between $900 and $2000.", + "sample values": [901.00, 2098.99, 14499.50, 2080.99, 2050.96, 1476.41], + "synonyms": ["listed selling price", "wholesale value"] + }, + { + "name": "comment", + "type": "table column", + "column name": "p_comment", + "data type": "string", + "description": "Description/commentary on the part" + } + ], + "description": "The various products supplied by various companies in shipments to different customers", + "synonyms": ["products", "components", "items", "goods"] + }, + { + "name": "suppliers", + "type": "simple table", + "table path": "TPCH_SF1.SUPPLIER", + "unique properties": ["key", "name"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "s_suppkey", + "data type": "numeric", + "description": "Unique identifier id for the supplier", + "sample values": [2452, 8063, 1, 10000, 5053], + "synonyms": ["id"] + }, + { + "name": "name", + "type": "table column", + "column name": "s_name", + "data type": "string", + "description": "Name of the supplier, which is always Supplier# where the number is the same as the supplier's key, prepended with zeros until it is 9 digits", + "sample values": ["Supplier#000008427", "Supplier#000001917", "Supplier#000000001", "Supplier#000010000", "Supplier#000000893"] + }, + { + "name": "address", + "type": "table column", + "column name": "s_address", + "data type": "string", + "description": "Address of the supplier as a cryptographically encrypted string to anonymize the data.", + "sample values": ["aSYD1SvrdIGV8LxRL QDp5m9dV", "ydl44utgudl6CP46TF7kliIcF5sC8K9,WH,Tj", "J1Vd3lqn1UvN2|4|14-632-452-6847"], + "synonyms": ["location", "street address", "corporate address", "headquarters"] + }, + { + "name": "nation_key", + "type": "table column", + "column name": "s_nationkey", + "data type": "numeric", + "description": "Key from the nation that the supplier belongs to", + "sample values": [0, 1, 10, 13, 24], + "synonyms": ["nation id"] + }, + { + "name": "phone", + "type": "table column", + "column name": "s_phone", + "data type": "string", + "description": "Phone number of the supplier in the format 'CC-XXX-XXX-XXXX' where CC is the country code (each nation has a unique country code).", + "sample values": ["25-995-176-6622", "18-132-649-2520", "30-505-249-4504", "10-132-649-2520", "27-599-541-3605"], + "synonyms": ["contact number", "telephone number"] + }, + { + "name": "account_balance", + "type": "table column", + "column name": "s_acctbal", + "data type": "numeric", + "description": "The account balance of the supplier in US dollars. The balance is rounded to the nearest cent and most of the values are between -$1,000 and +$10,000.", + "sample values": [-998.22, 9999.72, 4510.35, 9125.21, -0.92, 58.93], + "synonyms": ["balance", "credit", "wealth", "debt", "surplus", "cash on hand", "money in bank"] + }, + { + "name": "comment", + "type": "table column", + "column name": "s_comment", + "data type": "string", + "description": "Commentary/remark on the supplier" + } + ], + "description": "The various companies that supply different parts to fulfill purchase orders", + "synonyms": ["companies", "businesses", "vendors"] + }, + { + "name": "lines", + "type": "simple table", + "table path": "TPCH_SF1.LINEITEM", + "unique properties": [["order_key", "line_number"]], + "properties": [ + { + "name": "order_key", + "type": "table column", + "column name": "l_orderkey", + "data type": "numeric", + "description": "Key from the order that the line item belongs to", + "sample values": [5294597, 19010, 68581, 2710114, 2462791], + "synonyms": ["order id"] + }, + { + "name": "part_key", + "type": "table column", + "column name": "l_partkey", + "data type": "numeric", + "description": "Key from the part that the lineitem describes a purchase/shipment of", + "sample values": [1, 103719, 114994, 64760, 2440], + "synonyms": ["part id"] + }, + { + "name": "supplier_key", + "type": "table column", + "column name": "l_suppkey", + "data type": "numeric", + "description": "Key from the supplier that the lineitem describes a purchase/shipment from", + "sample values": [2452, 8063, 1, 10000, 5053], + "synonyms": ["supplier id"] + }, + { + "name": "line_number", + "type": "table column", + "column name": "l_linenumber", + "data type": "numeric", + "description": "The line number of the lineitem within the order. Each lineitem within an order has its own line number, and represents a purchase of a part from a supplier within the order.", + "sample values": [1, 2, 3, 4, 5, 6, 7], + "synonyms": ["line id", "shipment index within order"] + }, + { + "name": "quantity", + "type": "table column", + "column name": "l_quantity", + "data type": "numeric", + "description": "The number of units of the part that is being purchased in the lineitem, as a number between 1 and 50", + "sample values": [1, 10, 13, 25, 48, 50], + "synonyms": ["amount", "purchase volume", "units", "count", "number of items", "shipment size"] + }, + { + "name": "extended_price", + "type": "table column", + "column name": "l_extendedprice", + "data type": "numeric", + "description": "The extended price of the line item, which is the retail price of the part multiplied by the quantity purchased (before any discounts/taxes are applied). The price is rounded to the nearest cent and most of the values are between $900 and $100,000.", + "sample values": [901.00, 36036.00, 57657.60, 50450.4, 39097.8], + "synonyms": ["raw price", "gross cost", "total value before discount/tax"] + }, + { + "name": "discount", + "type": "table column", + "column name": "l_discount", + "data type": "numeric", + "description": "The discount applied to the line item, which is a ratio between 0 and 1 representing percentage of the extended price. The percentage is always between 0% (0.00) and 10% (0.10)", + "sample values": [0.00, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10], + "synonyms": ["markdown", "price reduction"] + }, + { + "name": "tax", + "type": "table column", + "column name": "l_tax", + "data type": "numeric", + "description": "The sales tax applied to the line item, which is a ratio between 0 and 1 representing percentage of the extended price. The percentage is always between 0% (0.00) and 8% (0.08)", + "sample values": [0.00, 0.01, 0.02, 0.03, 0.04, 0.05, 0.06, 0.07, 0.08], + "synonyms": ["levy", "duty", "tariff"] + }, + { + "name": "status", + "type": "table column", + "column name": "l_linestatus", + "data type": "string", + "description": "The status of the line item, which is always 'O' (for orders that have been placed but not yet filled, e.g. pending shipment) or 'F' (for orders that have been filled, e.g. already shipped)", + "sample values": ["O", "F"], + "synonyms": ["pending shipment", "shipment state", "fulfilled"] + }, + { + "name": "ship_date", + "type": "table column", + "column name": "l_shipdate", + "data type": "datetime", + "description": "The date that the line item was shipped from the supplier/warehouse. The date is always between 1992-01-01 and 1998-12-31" + }, + { + "name": "commit_date", + "type": "table column", + "column name": "l_commitdate", + "data type": "datetime", + "description": "The date that the line item was committed to be shipped from the supplier/warehouse. The date is always between 1992-01-01 and 1998-12-31. The ship date is ideally before or on the actual commit date, but can be after it if the shipment was delayed." + }, + { + "name": "receipt_date", + "type": "table column", + "column name": "l_receiptdate", + "data type": "datetime", + "description": "The date that the line item was received by the customer. The date is always between 1992-01-01 and 1998-12-31. The receipt date is after the ship date due to the time to ship the package." + }, + { + "name": "ship_instruct", + "type": "table column", + "column name": "l_shipinstruct", + "data type": "string", + "description": "The shipping instructions for the line item, which is always 'DELIVER IN PERSON', 'TAKE BACK RETURN', 'COLLECT COD' or 'NONE'", + "sample values": ["DELIVER IN PERSON", "TAKE BACK RETURN", "COLLECT COD", "NONE"], + "synonyms": ["shipping instructions", "delivery instructions"] + }, + { + "name": "ship_mode", + "type": "table column", + "column name": "l_shipmode", + "data type": "string", + "description": "The shipping mode for the line item, which is always 'AIR', 'AIR REG', 'FOB', 'MAIL' or 'SHIP'", + "sample values": ["AIR", "AIR REG", "FOB", "MAIL", "SHIP"], + "synonyms": ["shipping method", "delivery method"] + }, + { + "name": "return_flag", + "type": "table column", + "column name": "l_returnflag", + "data type": "string", + "description": "The return flag for the line item, which is always 'R' (for returned items) or 'N' (for non-returned items)", + "sample values": ["R", "N"], + "synonyms": ["return status"] + }, + { + "name": "comment", + "type": "table column", + "column name": "l_comment", + "data type": "string", + "description": "Commentary/remark on the line item" + } + ], + "description": "The line items for shipments within an order. Each line item within an order has its own line number, and represents a purchase of a part from a supplier within the order. The order can contain multiple lineitems from different suppliers for different parts, and the lineitems can each have their own shipping information.", + "synonyms": ["shipments", "packages", "purchases", "deliveries", "order components", "order elements"] + }, + { + "name": "supply_records", + "type": "simple table", + "table path": "TPCH_SF1.PARTSUPP", + "unique properties": [["part_key", "supplier_key"]], + "properties": [ + { + "name": "part_key", + "type": "table column", + "column name": "ps_partkey", + "data type": "numeric", + "description": "Key from the part that the supply record belongs to", + "sample values": [1, 103719, 114994, 64760, 2440], + "synonyms": ["part id"] + }, + { + "name": "supplier_key", + "type": "table column", + "column name": "ps_suppkey", + "data type": "numeric", + "description": "Key from the supplier that the supply record belongs to", + "sample values": [2452, 8063, 1, 10000, 5053], + "synonyms": ["supplier id"] + }, + { + "name": "available_quantity", + "type": "table column", + "column name": "ps_availqty", + "data type": "numeric", + "description": "The number of units of the part that the supplier has available to supply. The quantity is usually between 1 and 1,000", + "sample values": [4620, 3100, 6547, 3337, 76], + "synonyms": ["available stock", "inventory", "supply", "quantity remaining"] + }, + { + "name": "supply_cost", + "type": "table column", + "column name": "ps_supplycost", + "data type": "numeric", + "description": "The cost that it takes for the supplier to produce a single unit of the part. The cost is rounded to the nearest cent and most of the values are between $1 and $1,000.", + "sample values": [144.43, 772.21, 285.90, 50.12, 983.998], + "synonyms": ["production cost", "manufacturing cost", "cost of goods sold"] + }, + { + "name": "comment", + "type": "table column", + "column name": "ps_comment", + "data type": "string", + "description": "Commentary/remark on the supply record" + } + ], + "description": "Every combination of a supplier and a part that the supplier supplies. Each record contains information about the supplier of the part, the part itself, and the availability of the part from the supplier.", + "synonyms": ["supplier part information", "partsupp info", "manifest", "inventories", "catalog"] + }, + { + "name": "orders", + "type": "simple table", + "table path": "TPCH_SF1.ORDERS", + "unique properties": ["key"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "o_orderkey", + "data type": "numeric", + "description": "Unique identifier id for the order", + "sample values": [317728, 1096707, 5522855, 2624837, 1866566], + "synonyms": ["id"] + }, + { + "name": "customer_key", + "type": "table column", + "column name": "o_custkey", + "data type": "numeric", + "description": "Key from the customer that placed the order", + "sample values": [93721, 65251, 81379, 20663, 42247], + "synonyms": ["customer id"] + }, + { + "name": "order_status", + "type": "table column", + "column name": "o_orderstatus", + "data type": "string", + "description": "The status of the order, which is always 'O' for orders where all line items of the order have status 'O', 'F' for orders where all line items of the order have status 'F', and 'P' otherwise", + "sample values": ["O", "F", "P"], + "synonyms": ["order state", "fulfillment status"] + }, + { + "name": "total_price", + "type": "table column", + "column name": "o_totalprice", + "data type": "numeric", + "description": "The total price of the order after any discounts/taxes are applied, which is the sum of the extended price * (1 - discount) * (1 - tax) for all line items in the order. The price is rounded to the nearest cent and most of the values are between $800 and $600,000.", + "sample values": [857.71, 555285.16, 3618.2, 277554.58, 52737.18], + "synonyms": ["total cost", "total value"] + }, + { + "name": "order_date", + "type": "table column", + "column name": "o_orderdate", + "data type": "datetime", + "description": "The date that the order was placed. The date is always between 1992-01-01 and 1998-12-31", + "synonyms": ["order placed date", "order creation date", "purchase date"] + }, + { + "name": "order_priority", + "type": "table column", + "column name": "o_orderpriority", + "data type": "string", + "description": "The priority of the order, which is always '1-URGENT', '2-HIGH', '3-MEDIUM', '4-NOT SPECIFIED' or '5-LOW'", + "sample values": ["1-URGENT", "2-HIGH", "3-MEDIUM", "4-NOT SPECIFIED", "5-LOW"], + "synonyms": ["urgency", "priority level"] + }, + { + "name": "clerk", + "type": "table column", + "column name": "o_clerk", + "data type": "string", + "description": "The clerk that processed the order, which is always 'Clerk#' where the number is prepended with zeros until it is 9 digits", + "sample values": ["Clerk#000000001", "Clerk#000000090", "Clerk#000000635", "Clerk#000000892", "Clerk#000000073"], + "synonyms": ["salesperson", "representative", "agent", "notary", "officiant", "registrar", "overseer"] + }, + { + "name": "ship_priority", + "type": "table column", + "column name": "o_shippriority", + "data type": "numeric", + "description": "The priority of the order for shipping, which is always 0", + "sample values": [0], + "synonyms": ["shipping priority", "shipment urgency"] + }, + { + "name": "comment", + "type": "table column", + "column name": "o_comment", + "data type": "string", + "description": "Commentary/remark on the order" + } + ], + "description": "The orders that have been placed by customers. Each order can contain multiple lineitems from different suppliers for different parts, and the lineitems can each have their own shipping information.", + "synonyms": ["transactions"] + }, + { + "name": "customers", + "type": "simple table", + "table path": "TPCH_SF1.CUSTOMER", + "unique properties": ["key", "name"], + "properties": [ + { + "name": "key", + "type": "table column", + "column name": "c_custkey", + "data type": "numeric", + "description": "Unique identifier id for the customer", + "sample values": [93721, 65251, 81379, 20663, 42247], + "synonyms": ["id"] + }, + { + "name": "name", + "type": "table column", + "column name": "c_name", + "data type": "string", + "description": "Name of the customer, which is always Customer# where the number is the same as the customer's key, prepended with zeros until it is 9 digits", + "sample values": ["Customer#000000001", "Customer#000000090", "Customer#000000635", "Customer#000000892", "Customer#000000073"] + }, + { + "name": "address", + "type": "table column", + "column name": "c_address", + "data type": "string", + "description": "Address of the customer as a cryptographically encrypted string to anonymize the data.", + "sample values": ["ZCWMiIFUwRZWX7Vr7BjZ,,BZbGeGOBe7n", "HcpYT5Ag 2I2QY,nSoP5F1LI"], + "synonyms": ["location", "residence", "home address"] + }, + { + "name": "nation_key", + "type": "table column", + "column name": "c_nationkey", + "data type": "numeric", + "description": "Key from the nation that the customer belongs to", + "sample values": [0, 1, 10, 13, 24], + "synonyms": ["nation id"] + }, + { + "name": "phone", + "type": "table column", + "column name": "c_phone", + "data type": "string", + "description": "Phone number of the customer in the format 'CC-XXX-XXX-XXXX' where CC is the country code (each nation has a unique country code).", + "sample values": ["19-962-391-7546", "24-413-105-9570", "31-703-857-4846", "34-591-761-1886"], + "synonyms": ["contact number", "telephone number"] + }, + { + "name": "account_balance", + "type": "table column", + "column name": "c_acctbal", + "data type": "numeric", + "description": "The account balance of the customer in US dollars. The balance is rounded to the nearest cent and most of the values are between -$1,000 and +$10,000.", + "sample values": [-998.22, 9999.72, 4510.35, 9125.21, -0.92, 58.93], + "synonyms": ["balance", "credit", "wealth", "debt", "surplus", "cash on hand", "money in bank"] + }, + { + "name": "market_segment", + "type": "table column", + "column name": "c_mktsegment", + "data type": "string", + "description": "The market segment that the customer belongs to, which is always 'BUILDING', 'FURNITURE', 'AUTOMOBILE', 'MACHINERY' or 'HOUSEHOLD'", + "sample values": ["BUILDING", "FURNITURE", "AUTOMOBILE", "MACHINERY", "HOUSEHOLD"], + "synonyms": ["customer segment", "customer category", "market", "industry", "sector", "vertical"] + }, + { + "name": "comment", + "type": "table column", + "column name": "c_comment", + "data type": "string", + "description": "Commentary/remark on the customer" + } + ], + "description": "The customers in the system within each nation. Each customer can have placed zero, one, or several orders.", + "synonyms": ["citizens", "residents", "inhabitants", "consumers", "users", "buyers", "occupants"] + } + ], + "relationships": [ + { + "type": "simple join", + "name": "nations", + "parent collection": "regions", + "child collection": "nations", + "singular": false, + "always matches": true, + "keys": {"key": ["region_key"]}, + "description": "The nations contained within a region", + "synonyms": ["countries"] + }, + { + "type": "reverse", + "name": "region", + "original parent": "regions", + "original property": "nations", + "singular": true, + "always matches": true, + "description": "The region that a nation is part of", + "synonyms": ["continent", "segment of the world"] + }, + { + "type": "simple join", + "name": "suppliers", + "parent collection": "nations", + "child collection": "suppliers", + "singular": false, + "always matches": true, + "keys": {"key": ["nation_key"]}, + "description": "The suppliers belonging to a nation", + "synonyms": ["companies", "producers", "businesses"] + }, + { + "type": "reverse", + "name": "nation", + "original parent": "nations", + "original property": "suppliers", + "singular": true, + "always matches": true, + "description": "The nation that a supplier belongs to", + "synonyms": ["country", "state"] + }, + { + "type": "simple join", + "name": "customers", + "parent collection": "nations", + "child collection": "customers", + "singular": false, + "always matches": true, + "keys": {"key": ["nation_key"]}, + "description": "The customers belonging to a nation", + "synonyms": ["citizens", "residents", "inhabitants", "consumers", "users", "buyers", "occupants"] + }, + { + "type": "reverse", + "name": "nation", + "original parent": "nations", + "original property": "customers", + "singular": true, + "always matches": true, + "description": "The nation that a customer belongs to", + "synonyms": ["country", "state", "home"] + }, + { + "type": "simple join", + "name": "supply_records", + "parent collection": "parts", + "child collection": "supply_records", + "singular": false, + "always matches": false, + "keys": {"key": ["part_key"]}, + "description": "The records indicating which companies supply the part", + "synonyms": ["producers", "vendors", "suppliers of part"] + }, + { + "type": "reverse", + "name": "part", + "original parent": "parts", + "original property": "supply_records", + "singular": true, + "always matches": true, + "description": "The part that a supply record belongs to", + "synonyms": ["product", "item", "component"] + }, + { + "type": "simple join", + "name": "lines", + "parent collection": "parts", + "child collection": "lines", + "singular": false, + "always matches": false, + "keys": {"key": ["part_key"]}, + "description": "The line items for shipments of the part", + "synonyms": ["shipments", "packages", "purchases", "deliveries", "sales"] + }, + { + "type": "reverse", + "name": "part", + "original parent": "parts", + "original property": "lines", + "singular": true, + "always matches": true, + "description": "The part that a line item contains, e.g. what part is being shipped as part of an order", + "synonyms": ["product", "item", "component"] + }, + { + "type": "simple join", + "name": "supply_records", + "parent collection": "suppliers", + "child collection": "supply_records", + "singular": false, + "always matches": true, + "keys": {"key": ["supplier_key"]}, + "description": "The records indicating which parts the supplier supplies", + "synonyms": ["product catalog", "inventory", "components supplied"] + }, + { + "type": "reverse", + "name": "supplier", + "original parent": "suppliers", + "original property": "supply_records", + "singular": true, + "always matches": true, + "description": "The supplier that a supply record belongs to", + "synonyms": ["company", "producer", "business"] + }, + { + "type": "simple join", + "name": "lines", + "parent collection": "suppliers", + "child collection": "lines", + "singular": false, + "always matches": false, + "keys": {"key": ["supplier_key"]}, + "description": "The line items for shipments from the supplier, e.g. all purchases made from the supplier", + "synonyms": ["shipments", "packages", "purchases", "deliveries", "sales"] + }, + { + "type": "reverse", + "name": "supplier", + "original parent": "suppliers", + "original property": "lines", + "singular": true, + "always matches": true, + "description": "The supplier that a line item contains, e.g. what supplier is the part being purchased from as part of an order", + "synonyms": ["company", "producer", "business"] + }, + { + "type": "simple join", + "name": "part_and_supplier", + "parent collection": "lines", + "child collection": "supply_records", + "singular": true, + "always matches": true, + "keys": {"part_key": ["part_key"], "supplier_key": ["supplier_key"]}, + "description": "The corresponding entry in the supply records detailing more information about the supplier of the purchase and the part that was purchased", + "synonyms": ["supply records", "supplier part information", "partsupp info"] + }, + { + "type": "reverse", + "name": "lines", + "original parent": "lines", + "original property": "part_and_supplier", + "singular": false, + "always matches": false, + "description": "The line item instances of a part/supplier combination being purchased by a customer", + "synonym": ["shipments", "packages", "purchases", "deliveries", "line items"] + }, + { + "type": "simple join", + "name": "order", + "parent collection": "lines", + "child collection": "orders", + "singular": true, + "always matches": true, + "keys": {"order_key": ["key"]}, + "description": "The order that the line item belongs to" + }, + { + "type": "reverse", + "name": "lines", + "original parent": "lines", + "original property": "order", + "singular": false, + "always matches": true, + "description": "The line items that belong to an order, each representing the purchase of a specific part from a specific supplier", + "synonyms": ["items", "order contents", "entries", "line items"] + }, + { + "type": "simple join", + "name": "customer", + "parent collection": "orders", + "child collection": "customers", + "singular": true, + "always matches": true, + "keys": {"customer_key": ["key"]}, + "description": "The customer that placed the order", + "synonyms": ["buyer", "consumer", "user", "client"] + }, + { + "type": "reverse", + "name": "orders", + "original parent": "orders", + "original property": "customer", + "singular": false, + "always matches": false, + "description": "The orders that a customer has placed, each of which contains one or more line items", + "synonyms": ["transactions", "purchases"] + } + ], + "additional definitions": [], + "verified pydough analysis": [], + "extra semantic info": {} + } +] diff --git a/tests/test_pipeline_mysql.py b/tests/test_pipeline_mysql.py index a6b68edda..1177d55e0 100644 --- a/tests/test_pipeline_mysql.py +++ b/tests/test_pipeline_mysql.py @@ -201,8 +201,8 @@ def defog_mysql_test_data( defog_custom_pipeline_test_data: PyDoughPandasTest, # noqa: F811 ) -> PyDoughPandasTest: """ - Test data for e2e tests for the TPC-H queries. Returns an instance of - PyDoughPandasTest containing information about the test. + Modify reference solution data for some Defog queries. + Return an instance of PyDoughPandasTest containing the modified data. """ if defog_custom_pipeline_test_data.test_name == "week_offset": return PyDoughPandasTest( diff --git a/tests/test_pipeline_sf.py b/tests/test_pipeline_sf.py new file mode 100644 index 000000000..8d8a5bf63 --- /dev/null +++ b/tests/test_pipeline_sf.py @@ -0,0 +1,550 @@ +""" +Integration tests for the PyDough workflow on the TPC-H queries using Snowflake. +""" + +# ruff: noqa +# mypy: ignore-errors +# ruff & mypy should not try to typecheck or verify any of this + +import pandas as pd +import pytest +import datetime +from tests.test_pipeline_defog_custom import get_start_of_week, get_day_of_week +from pydough.metadata import GraphMetadata +from pydough.unqualified import UnqualifiedNode +from pydough.configs import DayOfWeek, PyDoughConfigs +from pydough.database_connectors import DatabaseContext +from tests.test_pydough_functions.tpch_outputs import ( + tpch_q16_output, +) +from tests.test_pydough_functions.tpch_test_functions import ( + impl_tpch_q16, +) + +from tests.test_pydough_functions.simple_pydough_functions import week_offset + +from tests.testing_utilities import ( + graph_fetcher, + harmonize_types, + PyDoughSQLComparisonTest, +) +from .test_pipeline_defog_custom import defog_custom_pipeline_test_data +from .test_pipeline_defog import defog_pipeline_test_data + +from .testing_utilities import PyDoughPandasTest +from pydough import init_pydough_context, to_df, to_sql + + +@pytest.fixture( + params=[ + pytest.param( + PyDoughPandasTest( + impl_tpch_q16, + "TPCH", + tpch_q16_output, + "tpch_q16_params", + ), + id="tpch_q16_params", + ), + ], +) +def snowflake_params_tpch_q16_data(request) -> PyDoughPandasTest: + """ + Test data for e2e tests for the TPC-H query 16. Returns an instance of + PyDoughPandasTest containing information about the test. + """ + return request.param + + +@pytest.mark.snowflake +@pytest.mark.execute +def test_pipeline_e2e_tpch_sf_conn( + tpch_pipeline_test_data: PyDoughPandasTest, + get_sf_sample_graph: graph_fetcher, + sf_conn_db_context: DatabaseContext, +): + """ + Test executing the TPC-H queries from the original code generation, + with Snowflake as the executing database. + Using the `connection` as keyword argument to the DatabaseContext. + """ + tpch_pipeline_test_data.run_e2e_test( + get_sf_sample_graph, + sf_conn_db_context("SNOWFLAKE_SAMPLE_DATA", "TPCH_SF1"), + coerce_types=True, + ) + + +@pytest.mark.snowflake +@pytest.mark.execute +def test_pipeline_e2e_tpch_sf_params( + snowflake_params_tpch_q16_data: PyDoughPandasTest, + get_sf_sample_graph: graph_fetcher, + sf_params_tpch_db_context: DatabaseContext, +): + """ + Test executing the TPC-H queries from the original code generation, + with Snowflake as the executing database. + Using the `user`, `password`, `account`, `database`, `schema`, and `warehouse` + as keyword arguments to the DatabaseContext. + """ + snowflake_params_tpch_q16_data.run_e2e_test( + get_sf_sample_graph, sf_params_tpch_db_context, coerce_types=True + ) + + +def simple_week_sampler(): + x_dt = datetime.datetime(2025, 3, 10, 11, 00, 0) + y_dt = datetime.datetime(2025, 3, 14, 11, 00, 0) + y_dt2 = datetime.datetime(2025, 3, 15, 11, 00, 0) + y_dt3 = datetime.datetime(2025, 3, 16, 11, 00, 0) + y_dt4 = datetime.datetime(2025, 3, 17, 11, 00, 0) + y_dt5 = datetime.datetime(2025, 3, 18, 11, 00, 0) + y_dt6 = datetime.datetime(2025, 3, 19, 11, 00, 0) + y_dt7 = datetime.datetime(2025, 3, 20, 11, 00, 0) + y_dt8 = datetime.datetime(2025, 3, 21, 11, 00, 0) + return TPCH.CALCULATE( + weeks_diff=DATEDIFF("weeks", x_dt, y_dt), + sow1=DATETIME(y_dt, "start of week"), + sow2=DATETIME(y_dt2, "start of week"), + sow3=DATETIME(y_dt3, "start of week"), + sow4=DATETIME(y_dt4, "start of week"), + sow5=DATETIME(y_dt5, "start of week"), + sow6=DATETIME(y_dt6, "start of week"), + sow7=DATETIME(y_dt7, "start of week"), + sow8=DATETIME(y_dt8, "start of week"), + dayname1=DAYNAME(y_dt), + dayname2=DAYNAME(y_dt2), + dayname3=DAYNAME(y_dt3), + dayname4=DAYNAME(y_dt4), + dayname5=DAYNAME(y_dt5), + dayname6=DAYNAME(y_dt6), + dayname7=DAYNAME(y_dt7), + dayname8=DAYNAME(y_dt8), + dayofweek1=DAYOFWEEK(y_dt), + dayofweek2=DAYOFWEEK(y_dt2), + dayofweek3=DAYOFWEEK(y_dt3), + dayofweek4=DAYOFWEEK(y_dt4), + dayofweek5=DAYOFWEEK(y_dt5), + dayofweek6=DAYOFWEEK(y_dt6), + dayofweek7=DAYOFWEEK(y_dt7), + dayofweek8=DAYOFWEEK(y_dt8), + ) + + +@pytest.mark.snowflake +@pytest.mark.execute +def test_pipeline_e2e_tpch_simple_week( + get_sf_sample_graph: graph_fetcher, + sf_conn_db_context: DatabaseContext, + week_handling_config: PyDoughConfigs, +): + """ + Test executing simple_week_sampler using the tpch schemas with different + week configurations, comparing against expected results. + """ + graph: GraphMetadata = get_sf_sample_graph("TPCH") + root: UnqualifiedNode = init_pydough_context(graph)(simple_week_sampler)() + result: pd.DataFrame = to_df( + root, + metadata=graph, + database=sf_conn_db_context("SNOWFLAKE_SAMPLE_DATA", "TPCH_SF1"), + config=week_handling_config, + ) + + # Generate expected DataFrame based on week_handling_config + start_of_week = week_handling_config.start_of_week + start_week_as_zero = week_handling_config.start_week_as_zero + + x_dt = pd.Timestamp(2025, 3, 10, 11, 0, 0) + y_dt = pd.Timestamp(2025, 3, 14, 11, 0, 0) + y_dt2 = pd.Timestamp(2025, 3, 15, 11, 0, 0) + y_dt3 = pd.Timestamp(2025, 3, 16, 11, 0, 0) + y_dt4 = pd.Timestamp(2025, 3, 17, 11, 0, 0) + y_dt5 = pd.Timestamp(2025, 3, 18, 11, 0, 0) + y_dt6 = pd.Timestamp(2025, 3, 19, 11, 0, 0) + y_dt7 = pd.Timestamp(2025, 3, 20, 11, 0, 0) + y_dt8 = pd.Timestamp(2025, 3, 21, 11, 0, 0) + + # Calculate weeks difference + x_sow = get_start_of_week(x_dt, start_of_week) + y_sow = get_start_of_week(y_dt, start_of_week) + weeks_diff = (y_sow - x_sow).days // 7 + + # Create lists to store calculated values + dates = [y_dt, y_dt2, y_dt3, y_dt4, y_dt5, y_dt6, y_dt7, y_dt8] + sows = [] + daynames = [] + dayofweeks = [] + + # Calculate values for each date in a loop + for dt in dates: + # Calculate start of week + sow = get_start_of_week(dt, start_of_week).strftime("%Y-%m-%d") + sows.append(sow) + + # Get day name + dayname = dt.day_name() + daynames.append(dayname) + + # Calculate day of week + dayofweek = get_day_of_week(dt, start_of_week, start_week_as_zero) + dayofweeks.append(dayofweek) + + # Create dictionary for DataFrame + data_dict = {"WEEKS_DIFF": [weeks_diff]} + + # Add start of week columns + for i in range(len(dates)): + data_dict[f"SOW{i + 1}"] = [sows[i]] + + # Add day name columns + for i in range(len(dates)): + data_dict[f"DAYNAME{i + 1}"] = [daynames[i]] + + # Add day of week columns + for i in range(len(dates)): + data_dict[f"DAYOFWEEK{i + 1}"] = [dayofweeks[i]] + + # Create DataFrame with expected results + expected_df = pd.DataFrame(data_dict) + for col_name in result.columns: + result[col_name], expected_df[col_name] = harmonize_types( + result[col_name], expected_df[col_name] + ) + pd.testing.assert_frame_equal(result, expected_df, check_dtype=False) + + +@pytest.fixture +def defog_sf_test_data( + defog_custom_pipeline_test_data: PyDoughPandasTest, +) -> PyDoughPandasTest: + """ + Modify reference solution data for some Defog queries. + Return an instance of PyDoughPandasTest containing the modified data. + """ + # Adjust the 3rd-to-last data point because Snowflake and SQLite + # handle "+1 month" differently: + # - Snowflake: if the next month overflows, it returns the end of next month + # - SQLite: adds 30 days, which may move into the following month + # + # Example: "2023-01-30 + 1 month" + # SQLite: 2023-03-02 vs. Snowflake: 2023-02-28 + if defog_custom_pipeline_test_data.test_name == "week_offset": + return PyDoughPandasTest( + week_offset, + "Broker", + lambda: pd.DataFrame( + { + "date_time": [ + "2023-04-02 09:30:00", + "2023-04-02 10:15:00", + "2023-04-02 11:00:00", + "2023-04-02 11:45:00", + "2023-04-02 12:30:00", + "2023-04-02 13:15:00", + "2023-04-02 14:00:00", + "2023-04-02 14:45:00", + "2023-04-02 15:30:00", + "2023-04-02 16:15:00", + "2023-04-03 09:30:00", + "2023-04-03 10:15:00", + "2023-04-03 11:00:00", + "2023-04-03 11:45:00", + "2023-04-03 12:30:00", + "2023-04-03 13:15:00", + "2023-04-03 14:00:00", + "2023-04-03 14:45:00", + "2023-04-03 15:30:00", + "2023-04-03 16:15:00", + "2023-01-15 10:00:00", + "2023-01-16 10:30:00", + "2023-02-20 11:30:00", + "2023-03-25 14:45:00", + "2023-01-30 13:15:00", + "2023-02-28 16:00:00", + "2023-03-30 09:45:00", + ], + "week_adj1": [ + "2023-04-09 09:30:00", + "2023-04-09 10:15:00", + "2023-04-09 11:00:00", + "2023-04-09 11:45:00", + "2023-04-09 12:30:00", + "2023-04-09 13:15:00", + "2023-04-09 14:00:00", + "2023-04-09 14:45:00", + "2023-04-09 15:30:00", + "2023-04-09 16:15:00", + "2023-04-10 09:30:00", + "2023-04-10 10:15:00", + "2023-04-10 11:00:00", + "2023-04-10 11:45:00", + "2023-04-10 12:30:00", + "2023-04-10 13:15:00", + "2023-04-10 14:00:00", + "2023-04-10 14:45:00", + "2023-04-10 15:30:00", + "2023-04-10 16:15:00", + "2023-01-22 10:00:00", + "2023-01-23 10:30:00", + "2023-02-27 11:30:00", + "2023-04-01 14:45:00", + "2023-02-06 13:15:00", + "2023-03-07 16:00:00", + "2023-04-06 09:45:00", + ], + "week_adj2": [ + "2023-03-26 09:30:00", + "2023-03-26 10:15:00", + "2023-03-26 11:00:00", + "2023-03-26 11:45:00", + "2023-03-26 12:30:00", + "2023-03-26 13:15:00", + "2023-03-26 14:00:00", + "2023-03-26 14:45:00", + "2023-03-26 15:30:00", + "2023-03-26 16:15:00", + "2023-03-27 09:30:00", + "2023-03-27 10:15:00", + "2023-03-27 11:00:00", + "2023-03-27 11:45:00", + "2023-03-27 12:30:00", + "2023-03-27 13:15:00", + "2023-03-27 14:00:00", + "2023-03-27 14:45:00", + "2023-03-27 15:30:00", + "2023-03-27 16:15:00", + "2023-01-08 10:00:00", + "2023-01-09 10:30:00", + "2023-02-13 11:30:00", + "2023-03-18 14:45:00", + "2023-01-23 13:15:00", + "2023-02-21 16:00:00", + "2023-03-23 09:45:00", + ], + "week_adj3": [ + "2023-04-16 10:30:00", + "2023-04-16 11:15:00", + "2023-04-16 12:00:00", + "2023-04-16 12:45:00", + "2023-04-16 13:30:00", + "2023-04-16 14:15:00", + "2023-04-16 15:00:00", + "2023-04-16 15:45:00", + "2023-04-16 16:30:00", + "2023-04-16 17:15:00", + "2023-04-17 10:30:00", + "2023-04-17 11:15:00", + "2023-04-17 12:00:00", + "2023-04-17 12:45:00", + "2023-04-17 13:30:00", + "2023-04-17 14:15:00", + "2023-04-17 15:00:00", + "2023-04-17 15:45:00", + "2023-04-17 16:30:00", + "2023-04-17 17:15:00", + "2023-01-29 11:00:00", + "2023-01-30 11:30:00", + "2023-03-06 12:30:00", + "2023-04-08 15:45:00", + "2023-02-13 14:15:00", + "2023-03-14 17:00:00", + "2023-04-13 10:45:00", + ], + "week_adj4": [ + "2023-04-16 09:29:59", + "2023-04-16 10:14:59", + "2023-04-16 10:59:59", + "2023-04-16 11:44:59", + "2023-04-16 12:29:59", + "2023-04-16 13:14:59", + "2023-04-16 13:59:59", + "2023-04-16 14:44:59", + "2023-04-16 15:29:59", + "2023-04-16 16:14:59", + "2023-04-17 09:29:59", + "2023-04-17 10:14:59", + "2023-04-17 10:59:59", + "2023-04-17 11:44:59", + "2023-04-17 12:29:59", + "2023-04-17 13:14:59", + "2023-04-17 13:59:59", + "2023-04-17 14:44:59", + "2023-04-17 15:29:59", + "2023-04-17 16:14:59", + "2023-01-29 09:59:59", + "2023-01-30 10:29:59", + "2023-03-06 11:29:59", + "2023-04-08 14:44:59", + "2023-02-13 13:14:59", + "2023-03-14 15:59:59", + "2023-04-13 09:44:59", + ], + "week_adj5": [ + "2023-04-17 09:30:00", + "2023-04-17 10:15:00", + "2023-04-17 11:00:00", + "2023-04-17 11:45:00", + "2023-04-17 12:30:00", + "2023-04-17 13:15:00", + "2023-04-17 14:00:00", + "2023-04-17 14:45:00", + "2023-04-17 15:30:00", + "2023-04-17 16:15:00", + "2023-04-18 09:30:00", + "2023-04-18 10:15:00", + "2023-04-18 11:00:00", + "2023-04-18 11:45:00", + "2023-04-18 12:30:00", + "2023-04-18 13:15:00", + "2023-04-18 14:00:00", + "2023-04-18 14:45:00", + "2023-04-18 15:30:00", + "2023-04-18 16:15:00", + "2023-01-30 10:00:00", + "2023-01-31 10:30:00", + "2023-03-07 11:30:00", + "2023-04-09 14:45:00", + "2023-02-14 13:15:00", + "2023-03-15 16:00:00", + "2023-04-14 09:45:00", + ], + "week_adj6": [ + "2023-04-16 09:29:00", + "2023-04-16 10:14:00", + "2023-04-16 10:59:00", + "2023-04-16 11:44:00", + "2023-04-16 12:29:00", + "2023-04-16 13:14:00", + "2023-04-16 13:59:00", + "2023-04-16 14:44:00", + "2023-04-16 15:29:00", + "2023-04-16 16:14:00", + "2023-04-17 09:29:00", + "2023-04-17 10:14:00", + "2023-04-17 10:59:00", + "2023-04-17 11:44:00", + "2023-04-17 12:29:00", + "2023-04-17 13:14:00", + "2023-04-17 13:59:00", + "2023-04-17 14:44:00", + "2023-04-17 15:29:00", + "2023-04-17 16:14:00", + "2023-01-29 09:59:00", + "2023-01-30 10:29:00", + "2023-03-06 11:29:00", + "2023-04-08 14:44:00", + "2023-02-13 13:14:00", + "2023-03-14 15:59:00", + "2023-04-13 09:44:00", + ], + "week_adj7": [ + "2023-05-16 09:30:00", + "2023-05-16 10:15:00", + "2023-05-16 11:00:00", + "2023-05-16 11:45:00", + "2023-05-16 12:30:00", + "2023-05-16 13:15:00", + "2023-05-16 14:00:00", + "2023-05-16 14:45:00", + "2023-05-16 15:30:00", + "2023-05-16 16:15:00", + "2023-05-17 09:30:00", + "2023-05-17 10:15:00", + "2023-05-17 11:00:00", + "2023-05-17 11:45:00", + "2023-05-17 12:30:00", + "2023-05-17 13:15:00", + "2023-05-17 14:00:00", + "2023-05-17 14:45:00", + "2023-05-17 15:30:00", + "2023-05-17 16:15:00", + "2023-03-01 10:00:00", + "2023-03-02 10:30:00", + "2023-04-03 11:30:00", + "2023-05-09 14:45:00", + "2023-03-14 13:15:00", + "2023-04-11 16:00:00", + "2023-05-14 09:45:00", + ], + "week_adj8": [ + "2024-04-16 09:30:00", + "2024-04-16 10:15:00", + "2024-04-16 11:00:00", + "2024-04-16 11:45:00", + "2024-04-16 12:30:00", + "2024-04-16 13:15:00", + "2024-04-16 14:00:00", + "2024-04-16 14:45:00", + "2024-04-16 15:30:00", + "2024-04-16 16:15:00", + "2024-04-17 09:30:00", + "2024-04-17 10:15:00", + "2024-04-17 11:00:00", + "2024-04-17 11:45:00", + "2024-04-17 12:30:00", + "2024-04-17 13:15:00", + "2024-04-17 14:00:00", + "2024-04-17 14:45:00", + "2024-04-17 15:30:00", + "2024-04-17 16:15:00", + "2024-01-29 10:00:00", + "2024-01-30 10:30:00", + "2024-03-05 11:30:00", + "2024-04-08 14:45:00", + "2024-02-13 13:15:00", + "2024-03-13 16:00:00", + "2024-04-13 09:45:00", + ], + } + ), + "week_offset", + skip_sql=True, + ) + + return defog_custom_pipeline_test_data + + +@pytest.mark.snowflake +@pytest.mark.execute +def test_pipeline_sf_e2e_defog_custom( + defog_sf_test_data: PyDoughPandasTest, + get_sf_defog_graphs: graph_fetcher, + sf_conn_db_context: DatabaseContext, +): + """ + Test executing the defog analytical queries with Snowflake database. + """ + defog_sf_test_data.run_e2e_test( + get_sf_defog_graphs, + sf_conn_db_context("DEFOG", defog_sf_test_data.graph_name), + coerce_types=True, + ) + + +@pytest.mark.snowflake +@pytest.mark.execute +def test_defog_e2e( + defog_pipeline_test_data: PyDoughSQLComparisonTest, + get_sf_defog_graphs: graph_fetcher, + sf_conn_db_context: DatabaseContext, + defog_config: PyDoughConfigs, + sqlite_defog_connection: DatabaseContext, +) -> None: + """ + Test executing the defog analytical questions on the sqlite database, + comparing against the result of running the reference SQL query text on the + same database connector. Run on the defog.ai queries. + NOTE: passing SQLite connection as reference database so that refsol + is executed using SQLite. + This is needed because refsol uses SQLite SQL syntax to obtain + the correct results. + """ + defog_pipeline_test_data.run_e2e_test( + get_sf_defog_graphs, + sf_conn_db_context("DEFOG", defog_pipeline_test_data.graph_name), + defog_config, + reference_database=sqlite_defog_connection, + coerce_types=True, + ) diff --git a/tests/test_sql_refsols/agg_simplification_1_snowflake.sql b/tests/test_sql_refsols/agg_simplification_1_snowflake.sql new file mode 100644 index 000000000..83fcdd09c --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_1_snowflake.sql @@ -0,0 +1,85 @@ +WITH _t0 AS ( + SELECT + LENGTH(CASE WHEN sbtickerexchange <> 'NYSE Arca' THEN sbtickerexchange ELSE NULL END) AS aug_exchange, + COUNT(*) AS count_one + FROM main.sbticker + GROUP BY + 1 +) +SELECT + aug_exchange, + count_one AS su1, + count_one * 2 AS su2, + count_one * -1 AS su3, + count_one * -3 AS su4, + 0 AS su5, + count_one * 0.5 AS su6, + 0 AS su7, + COALESCE(aug_exchange, 0) AS su8, + count_one AS co1, + count_one AS co2, + count_one AS co3, + count_one AS co4, + count_one AS co5, + count_one AS co6, + 0 AS co7, + count_one * IFF(NOT aug_exchange IS NULL, 1, 0) AS co8, + 1 AS nd1, + 1 AS nd2, + 1 AS nd3, + 1 AS nd4, + 1 AS nd5, + 1 AS nd6, + 0 AS nd7, + CAST(NOT aug_exchange IS NULL AS BIGINT) AS nd8, + 1 AS av1, + 2 AS av2, + -1 AS av3, + -3 AS av4, + 0 AS av5, + 0.5 AS av6, + NULL AS av7, + aug_exchange AS av8, + 1 AS mi1, + 2 AS mi2, + -1 AS mi3, + -3 AS mi4, + 0 AS mi5, + 0.5 AS mi6, + NULL AS mi7, + aug_exchange AS mi8, + 1 AS ma1, + 2 AS ma2, + -1 AS ma3, + -3 AS ma4, + 0 AS ma5, + 0.5 AS ma6, + NULL AS ma7, + aug_exchange AS ma8, + 1 AS an1, + 2 AS an2, + -1 AS an3, + -3 AS an4, + 0 AS an5, + 0.5 AS an6, + NULL AS an7, + aug_exchange AS an8, + 1 AS me1, + 2 AS me2, + -1 AS me3, + -3 AS me4, + 0 AS me5, + 0.5 AS me6, + NULL AS me7, + aug_exchange AS me8, + 1 AS qu1, + 2 AS qu2, + -1 AS qu3, + -3 AS qu4, + 0 AS qu5, + 0.5 AS qu6, + NULL AS qu7, + aug_exchange AS qu8 +FROM _t0 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/agg_simplification_2_snowflake.sql b/tests/test_sql_refsols/agg_simplification_2_snowflake.sql new file mode 100644 index 000000000..c6f20a0d2 --- /dev/null +++ b/tests/test_sql_refsols/agg_simplification_2_snowflake.sql @@ -0,0 +1,16 @@ +SELECT + sbcuststate AS state, + COUNT(DISTINCT sbcustcity) AS a1, + COUNT(*) AS a2, + COUNT(CASE WHEN STARTSWITH(LOWER(sbcustname), 'j') THEN sbcustname ELSE NULL END) AS a3, + COALESCE(SUM(CAST(sbcustpostalcode AS BIGINT)), 0) AS a4, + MIN(sbcustphone) AS a5, + MAX(sbcustphone) AS a6, + ANY_VALUE(LOWER(sbcuststate)) AS a7, + ANY_VALUE(LOWER(sbcuststate)) AS a8, + ANY_VALUE(LOWER(sbcuststate)) AS a9 +FROM main.sbcustomer +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/aggregation_functions_snowflake.sql b/tests/test_sql_refsols/aggregation_functions_snowflake.sql new file mode 100644 index 000000000..0477091f2 --- /dev/null +++ b/tests/test_sql_refsols/aggregation_functions_snowflake.sql @@ -0,0 +1,47 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + o_custkey + FROM tpch.orders + GROUP BY + 2 +), _t1 AS ( + SELECT + PERCENTILE_DISC(0.8) WITHIN GROUP (ORDER BY + customer.c_acctbal) AS agg_7, + ANY_VALUE(customer.c_acctbal) AS anything_c_acctbal, + AVG(customer.c_acctbal) AS avg_c_acctbal, + COUNT(customer.c_acctbal) AS count_c_acctbal, + MAX(customer.c_acctbal) AS max_c_acctbal, + MEDIAN(customer.c_acctbal) AS median_c_acctbal, + MIN(customer.c_acctbal) AS min_c_acctbal, + COUNT(DISTINCT customer.c_acctbal) AS ndistinct_c_acctbal, + STDDEV(customer.c_acctbal) AS sample_std_c_acctbal, + VARIANCE(customer.c_acctbal) AS sample_variance_c_acctbal, + SUM(customer.c_acctbal) AS sum_c_acctbal, + SUM(_s1.n_rows) AS sum_n_rows, + customer.c_nationkey + FROM tpch.customer AS customer + LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey + GROUP BY + 13 +) +SELECT + COALESCE(_t1.sum_c_acctbal, 0) AS sum_value, + _t1.avg_c_acctbal AS avg_value, + _t1.median_c_acctbal AS median_value, + _t1.min_c_acctbal AS min_value, + _t1.max_c_acctbal AS max_value, + _t1.agg_7 AS quantile_value, + _t1.anything_c_acctbal AS anything_value, + _t1.count_c_acctbal AS count_value, + _t1.ndistinct_c_acctbal AS count_distinct_value, + _t1.sample_variance_c_acctbal AS variance_value, + _t1.sample_std_c_acctbal AS stddev_value +FROM tpch.nation AS nation +JOIN _t1 AS _t1 + ON _t1.c_nationkey = nation.n_nationkey + AND ( + _t1.sum_n_rows = 0 OR _t1.sum_n_rows IS NULL + ) diff --git a/tests/test_sql_refsols/arithmetic_and_binary_operators_snowflake.sql b/tests/test_sql_refsols/arithmetic_and_binary_operators_snowflake.sql new file mode 100644 index 000000000..0a1276bc9 --- /dev/null +++ b/tests/test_sql_refsols/arithmetic_and_binary_operators_snowflake.sql @@ -0,0 +1,16 @@ +SELECT + ( + lineitem.l_extendedprice * ( + 1 - ( + POWER(lineitem.l_discount, 2) + ) + ) + 1.0 + ) / part.p_retailprice AS computed_value, + lineitem.l_quantity + lineitem.l_extendedprice AS total, + lineitem.l_extendedprice - lineitem.l_quantity AS delta, + lineitem.l_quantity * lineitem.l_discount AS product, + lineitem.l_extendedprice / lineitem.l_quantity AS ratio, + POWER(lineitem.l_discount, 2) AS exponent +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey diff --git a/tests/test_sql_refsols/casting_functions_snowflake.sql b/tests/test_sql_refsols/casting_functions_snowflake.sql new file mode 100644 index 000000000..6393eacfd --- /dev/null +++ b/tests/test_sql_refsols/casting_functions_snowflake.sql @@ -0,0 +1,6 @@ +SELECT + TO_CHAR(CAST(o_orderdate AS TIMESTAMP), 'yyyy-mm-DD') AS cast_to_string, + CAST(o_totalprice AS TEXT) AS cast_to_string2, + CAST(o_totalprice AS BIGINT) AS cast_to_integer, + CAST(o_shippriority AS DOUBLE) AS cast_to_float +FROM tpch.orders diff --git a/tests/test_sql_refsols/comparisons_and_logical_operators_snowflake.sql b/tests/test_sql_refsols/comparisons_and_logical_operators_snowflake.sql new file mode 100644 index 000000000..b3569f559 --- /dev/null +++ b/tests/test_sql_refsols/comparisons_and_logical_operators_snowflake.sql @@ -0,0 +1,24 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + o_custkey + FROM tpch.orders + GROUP BY + 2 +) +SELECT + customer.c_acctbal < 0 AS in_debt, + _s1.n_rows <= 12 OR _s1.n_rows IS NULL AS at_most_12_orders, + region.r_name = 'EUROPE' AS is_european, + nation.n_name <> 'GERMANY' AS non_german, + customer.c_acctbal > 0 AS non_empty_acct, + NOT _s1.n_rows IS NULL AND _s1.n_rows >= 5 AS at_least_5_orders, + region.r_name = 'ASIA' OR region.r_name = 'EUROPE' AS is_eurasian, + customer.c_acctbal < 0 AND region.r_name = 'EUROPE' AS is_european_in_debt +FROM tpch.customer AS customer +LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey diff --git a/tests/test_sql_refsols/conditional_functions_snowflake.sql b/tests/test_sql_refsols/conditional_functions_snowflake.sql new file mode 100644 index 000000000..e866f55e9 --- /dev/null +++ b/tests/test_sql_refsols/conditional_functions_snowflake.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + MIN(o_totalprice) AS min_o_totalprice, + o_custkey + FROM tpch.orders + GROUP BY + 2 +) +SELECT + IFF(customer.c_acctbal > 1000, 'High', 'Low') AS iff_col, + customer.c_name IN ('Alice', 'Bob', 'Charlie') AS isin_col, + COALESCE(_s1.min_o_totalprice, 0.0) AS default_val, + NOT _s1.min_o_totalprice IS NULL AS has_acct_bal, + _s1.min_o_totalprice IS NULL AS no_acct_bal, + CASE WHEN customer.c_acctbal > 0 THEN customer.c_acctbal ELSE NULL END AS no_debt_bal +FROM tpch.customer AS customer +LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +WHERE + customer.c_acctbal <= 1000 AND customer.c_acctbal >= 100 diff --git a/tests/test_sql_refsols/cumulative_stock_analysis_snowflake.sql b/tests/test_sql_refsols/cumulative_stock_analysis_snowflake.sql new file mode 100644 index 000000000..25d8b846f --- /dev/null +++ b/tests/test_sql_refsols/cumulative_stock_analysis_snowflake.sql @@ -0,0 +1,32 @@ +SELECT + sbtransaction.sbtxdatetime AS date_time, + COUNT(*) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS txn_within_day, + COUNT( + CASE WHEN sbtransaction.sbtxtype = 'buy' THEN sbtransaction.sbtxtype ELSE NULL END + ) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n_buys_within_day, + ROUND( + ( + 100.0 * COUNT_IF(sbticker.sbtickersymbol IN ('AAPL', 'AMZN')) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + ) / COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS pct_apple_txns, + SUM( + IFF( + sbtransaction.sbtxtype = 'buy', + sbtransaction.sbtxshares, + 0 - sbtransaction.sbtxshares + ) + ) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS share_change, + ROUND( + AVG(sbtransaction.sbtxamount) OVER (ORDER BY sbtransaction.sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS rolling_avg_amount +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid +WHERE + MONTH(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) = 4 + AND YEAR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) = 2023 + AND sbtransaction.sbtxstatus = 'success' +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/datediff_snowflake.sql b/tests/test_sql_refsols/datediff_snowflake.sql new file mode 100644 index 000000000..90148f8b4 --- /dev/null +++ b/tests/test_sql_refsols/datediff_snowflake.sql @@ -0,0 +1,16 @@ +SELECT + sbtxdatetime AS x, + CAST('2025-05-02 11:00:00' AS TIMESTAMP) AS y1, + CAST('2023-04-03 13:16:30' AS TIMESTAMP) AS y, + DATEDIFF(YEAR, CAST(sbtxdatetime AS DATETIME), CAST('2025-05-02 11:00:00' AS TIMESTAMP)) AS years_diff, + DATEDIFF(MONTH, CAST(sbtxdatetime AS DATETIME), CAST('2025-05-02 11:00:00' AS TIMESTAMP)) AS months_diff, + DATEDIFF(DAY, CAST(sbtxdatetime AS DATETIME), CAST('2025-05-02 11:00:00' AS TIMESTAMP)) AS days_diff, + DATEDIFF(HOUR, CAST(sbtxdatetime AS DATETIME), CAST('2025-05-02 11:00:00' AS TIMESTAMP)) AS hours_diff, + DATEDIFF(MINUTE, CAST(sbtxdatetime AS DATETIME), CAST('2023-04-03 13:16:30' AS TIMESTAMP)) AS minutes_diff, + DATEDIFF(SECOND, CAST(sbtxdatetime AS DATETIME), CAST('2023-04-03 13:16:30' AS TIMESTAMP)) AS seconds_diff +FROM main.sbtransaction +WHERE + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 +ORDER BY + 4 NULLS FIRST +LIMIT 30 diff --git a/tests/test_sql_refsols/datetime_functions_ansi.sql b/tests/test_sql_refsols/datetime_functions_ansi.sql index 021f5b708..fa4939796 100644 --- a/tests/test_sql_refsols/datetime_functions_ansi.sql +++ b/tests/test_sql_refsols/datetime_functions_ansi.sql @@ -3,7 +3,7 @@ SELECT DATE_TRUNC('DAY', CURRENT_TIMESTAMP()) AS ts_now_2, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()) AS ts_now_3, DATE_ADD(CURRENT_TIMESTAMP(), 1, 'HOUR') AS ts_now_4, - CAST('2025-01-01 00:00:00' AS TIMESTAMP) AS ts_now_5, + DATE_TRUNC('MONTH', CAST('2025-01-01 00:00:00' AS TIMESTAMP)) AS ts_now_5, CAST('1995-10-08 00:00:00' AS TIMESTAMP) AS ts_now_6, EXTRACT(YEAR FROM CAST(o_orderdate AS DATETIME)) AS year_col, EXTRACT(YEAR FROM CAST('2020-05-01 00:00:00' AS TIMESTAMP)) AS year_py, diff --git a/tests/test_sql_refsols/datetime_functions_snowflake.sql b/tests/test_sql_refsols/datetime_functions_snowflake.sql new file mode 100644 index 000000000..9efb94cfe --- /dev/null +++ b/tests/test_sql_refsols/datetime_functions_snowflake.sql @@ -0,0 +1,88 @@ +SELECT + CURRENT_TIMESTAMP() AS ts_now_1, + DATE_TRUNC('DAY', CURRENT_TIMESTAMP()) AS ts_now_2, + DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()) AS ts_now_3, + DATEADD(HOUR, 1, CURRENT_TIMESTAMP()) AS ts_now_4, + DATE_TRUNC('MONTH', CAST('2025-01-01 00:00:00' AS TIMESTAMP)) AS ts_now_5, + CAST('1995-10-08 00:00:00' AS TIMESTAMP) AS ts_now_6, + YEAR(CAST(o_orderdate AS TIMESTAMP)) AS year_col, + YEAR(CAST('2020-05-01 00:00:00' AS TIMESTAMP)) AS year_py, + YEAR(CAST('1995-10-10 00:00:00' AS TIMESTAMP)) AS year_pd, + MONTH(CAST(o_orderdate AS TIMESTAMP)) AS month_col, + MONTH(CAST('2025-02-25' AS TIMESTAMP)) AS month_str, + MONTH(CAST('1992-01-01 12:30:45' AS TIMESTAMP)) AS month_dt, + DAY(CAST(o_orderdate AS TIMESTAMP)) AS day_col, + DAY(CAST('1996-11-25 10:45:00' AS TIMESTAMP)) AS day_str, + HOUR(CAST('1995-12-01 23:59:59' AS TIMESTAMP)) AS hour_str, + MINUTE(CAST('1995-12-01 23:59:59' AS TIMESTAMP)) AS minute_str, + SECOND(CAST('1992-01-01 00:00:59' AS TIMESTAMP)) AS second_ts, + DATEDIFF(DAY, CAST(o_orderdate AS DATETIME), CAST('1992-01-01' AS TIMESTAMP)) AS dd_col_str, + DATEDIFF(DAY, CAST('1992-01-01' AS TIMESTAMP), CAST(o_orderdate AS DATETIME)) AS dd_str_col, + DATEDIFF(MONTH, CAST('1995-10-10 00:00:00' AS TIMESTAMP), CAST(o_orderdate AS DATETIME)) AS dd_pd_col, + DATEDIFF(YEAR, CAST(o_orderdate AS DATETIME), CAST('1992-01-01 12:30:45' AS TIMESTAMP)) AS dd_col_dt, + DATEDIFF( + WEEK, + DATEADD( + DAY, + DAYOFWEEK(CAST('1992-01-01' AS TIMESTAMP)) * -1, + CAST('1992-01-01' AS TIMESTAMP) + ), + DATEADD( + DAY, + DAYOFWEEK(CAST('1992-01-01 12:30:45' AS TIMESTAMP)) * -1, + CAST('1992-01-01 12:30:45' AS TIMESTAMP) + ) + ) AS dd_dt_str, + DAYOFWEEK(o_orderdate) AS dow_col, + DAYOFWEEK('1992-07-01') AS dow_str, + DAYOFWEEK(CAST('1992-01-01 12:30:45' AS TIMESTAMP)) AS dow_dt, + DAYOFWEEK(CAST('1995-10-10 00:00:00' AS TIMESTAMP)) AS dow_pd, + CASE + WHEN DAYOFWEEK(o_orderdate) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(o_orderdate) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(o_orderdate) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(o_orderdate) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(o_orderdate) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(o_orderdate) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(o_orderdate) = 6 + THEN 'Saturday' + END AS dayname_col, + CASE + WHEN DAYOFWEEK('1995-06-30') = 0 + THEN 'Sunday' + WHEN DAYOFWEEK('1995-06-30') = 1 + THEN 'Monday' + WHEN DAYOFWEEK('1995-06-30') = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK('1995-06-30') = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK('1995-06-30') = 4 + THEN 'Thursday' + WHEN DAYOFWEEK('1995-06-30') = 5 + THEN 'Friday' + WHEN DAYOFWEEK('1995-06-30') = 6 + THEN 'Saturday' + END AS dayname_str, + CASE + WHEN DAYOFWEEK(CAST('1993-08-15 00:00:00' AS TIMESTAMP)) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(CAST('1993-08-15 00:00:00' AS TIMESTAMP)) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(CAST('1993-08-15 00:00:00' AS TIMESTAMP)) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(CAST('1993-08-15 00:00:00' AS TIMESTAMP)) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(CAST('1993-08-15 00:00:00' AS TIMESTAMP)) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(CAST('1993-08-15 00:00:00' AS TIMESTAMP)) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(CAST('1993-08-15 00:00:00' AS TIMESTAMP)) = 6 + THEN 'Saturday' + END AS dayname_dt +FROM tpch.orders diff --git a/tests/test_sql_refsols/datetime_sampler_ansi.sql b/tests/test_sql_refsols/datetime_sampler_ansi.sql index 23a502279..2d55f5a3b 100644 --- a/tests/test_sql_refsols/datetime_sampler_ansi.sql +++ b/tests/test_sql_refsols/datetime_sampler_ansi.sql @@ -19,29 +19,29 @@ SELECT CURRENT_TIMESTAMP() AS _expr17, CURRENT_TIMESTAMP() AS _expr18, CAST(o_orderdate AS TIMESTAMP) AS _expr19, - DATE_TRUNC('SECOND', CURRENT_TIMESTAMP()) AS _expr20, + TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), SECOND) AS _expr20, DATE_ADD(DATE_ADD(DATE_TRUNC('YEAR', CURRENT_TIMESTAMP()), 8, 'MINUTE'), -141, 'MONTH') AS _expr21, - DATE_TRUNC('HOUR', DATE_TRUNC('SECOND', DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()))) AS _expr22, - DATE_TRUNC('HOUR', DATE_TRUNC('SECOND', DATE_TRUNC('HOUR', CURRENT_TIMESTAMP()))) AS _expr23, + TIMESTAMP_TRUNC(TIMESTAMP_TRUNC(DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()), SECOND), HOUR) AS _expr22, + TIMESTAMP_TRUNC(TIMESTAMP_TRUNC(TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), HOUR), SECOND), HOUR) AS _expr23, DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), -96, 'HOUR'), 15, 'YEAR') AS _expr24, DATE_ADD( - DATE_TRUNC('MINUTE', DATE_ADD(DATE_TRUNC('YEAR', CURRENT_TIMESTAMP()), -3, 'YEAR')), + TIMESTAMP_TRUNC(DATE_ADD(DATE_TRUNC('YEAR', CURRENT_TIMESTAMP()), -3, 'YEAR'), MINUTE), 65, 'MONTH' ) AS _expr25, DATE_TRUNC('YEAR', DATE_ADD(CAST(o_orderdate AS TIMESTAMP), -56, 'HOUR')) AS _expr26, - DATE_TRUNC('SECOND', DATE_TRUNC('MINUTE', DATE_ADD(CURRENT_TIMESTAMP(), -63, 'DAY'))) AS _expr27, + TIMESTAMP_TRUNC(TIMESTAMP_TRUNC(DATE_ADD(CURRENT_TIMESTAMP(), -63, 'DAY'), MINUTE), SECOND) AS _expr27, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()) AS _expr28, - DATE_ADD(DATE_TRUNC('SECOND', DATE_ADD(CURRENT_TIMESTAMP(), -312, 'HOUR')), 48, 'YEAR') AS _expr29, + DATE_ADD(TIMESTAMP_TRUNC(DATE_ADD(CURRENT_TIMESTAMP(), -312, 'HOUR'), SECOND), 48, 'YEAR') AS _expr29, DATE_ADD( DATE_ADD(DATE_TRUNC('DAY', DATE_ADD(CURRENT_TIMESTAMP(), 75, 'DAY')), 600, 'MINUTE'), -294, 'DAY' ) AS _expr30, DATE_ADD(DATE_ADD(DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()), 480, 'MONTH'), -45, 'YEAR') AS _expr31, - DATE_TRUNC( - 'SECOND', - DATE_TRUNC('DAY', DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), -270, 'MINUTE'), -34, 'SECOND')) + TIMESTAMP_TRUNC( + DATE_TRUNC('DAY', DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), -270, 'MINUTE'), -34, 'SECOND')), + SECOND ) AS _expr32, DATE_ADD(DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()), 213, 'SECOND') AS _expr33, DATE_ADD( @@ -51,13 +51,13 @@ SELECT ) AS _expr34, DATE_TRUNC('DAY', CURRENT_TIMESTAMP()) AS _expr35, DATE_ADD( - DATE_ADD(DATE_TRUNC('HOUR', CAST('2025-01-01' AS TIMESTAMP)), 49, 'MINUTE'), + DATE_ADD(TIMESTAMP_TRUNC(CAST('2025-01-01' AS TIMESTAMP), HOUR), 49, 'MINUTE'), 91, 'YEAR' ) AS _expr36, DATE_TRUNC('DAY', DATE_TRUNC('YEAR', CURRENT_TIMESTAMP())) AS _expr37, DATE_TRUNC('YEAR', DATE_TRUNC('DAY', CURRENT_TIMESTAMP())) AS _expr38, - CAST('2025-07-01 13:20:45' AS TIMESTAMP) AS _expr39, + DATE_ADD(DATE_TRUNC('MONTH', CAST('2025-07-04 12:58:45' AS TIMESTAMP)), 22, 'MINUTE') AS _expr39, DATE_TRUNC('YEAR', CURRENT_TIMESTAMP()) AS _expr40, DATE_TRUNC( 'YEAR', @@ -69,14 +69,14 @@ SELECT ) AS _expr41, DATE_ADD(CURRENT_TIMESTAMP(), 192, 'MONTH') AS _expr42, DATE_ADD( - DATE_TRUNC('HOUR', DATE_TRUNC('MINUTE', DATE_TRUNC('HOUR', CURRENT_TIMESTAMP()))), + TIMESTAMP_TRUNC(TIMESTAMP_TRUNC(TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), HOUR), MINUTE), HOUR), 486, 'MINUTE' ) AS _expr43, - DATE_ADD(DATE_TRUNC('SECOND', CURRENT_TIMESTAMP()), -50, 'HOUR') AS _expr44, - DATE_TRUNC( - 'HOUR', - DATE_ADD(DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), 297, 'DAY'), 72, 'MONTH'), -92, 'MONTH') + DATE_ADD(TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), SECOND), -50, 'HOUR') AS _expr44, + TIMESTAMP_TRUNC( + DATE_ADD(DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), 297, 'DAY'), 72, 'MONTH'), -92, 'MONTH'), + HOUR ) AS _expr45, DATE_TRUNC('DAY', DATE_ADD(CURRENT_TIMESTAMP(), 285, 'SECOND')) AS _expr46, CAST('1999-05-15 00:00:00' AS TIMESTAMP) AS _expr47, @@ -86,26 +86,32 @@ SELECT 'DAY' ) AS _expr48, DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), 212, 'MINUTE'), 368, 'YEAR') AS _expr49, - DATE_TRUNC('MINUTE', DATE_TRUNC('MINUTE', CAST('2024-01-01 11:59:00' AS TIMESTAMP))) AS _expr50, - DATE_TRUNC('DAY', DATE_TRUNC('HOUR', CAST('1999-03-14' AS TIMESTAMP))) AS _expr51, + TIMESTAMP_TRUNC( + TIMESTAMP_TRUNC( + DATE_TRUNC('YEAR', DATE_TRUNC('MONTH', CAST('2024-12-31 11:59:00' AS TIMESTAMP))), + MINUTE + ), + MINUTE + ) AS _expr50, + DATE_TRUNC('DAY', TIMESTAMP_TRUNC(CAST('1999-03-14' AS TIMESTAMP), HOUR)) AS _expr51, DATE_ADD( - DATE_TRUNC('MINUTE', DATE_TRUNC('DAY', DATE_ADD(CURRENT_TIMESTAMP(), -60, 'HOUR'))), + TIMESTAMP_TRUNC(DATE_TRUNC('DAY', DATE_ADD(CURRENT_TIMESTAMP(), -60, 'HOUR')), MINUTE), 196, 'YEAR' ) AS _expr52, DATE_ADD( - DATE_TRUNC('MINUTE', DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), -40, 'HOUR'), -385, 'DAY')), + TIMESTAMP_TRUNC(DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), -40, 'HOUR'), -385, 'DAY'), MINUTE), 29, 'HOUR' ) AS _expr53, - DATE_TRUNC('MINUTE', DATE_TRUNC('HOUR', DATE_ADD(CURRENT_TIMESTAMP(), 405, 'DAY'))) AS _expr54, + TIMESTAMP_TRUNC(TIMESTAMP_TRUNC(DATE_ADD(CURRENT_TIMESTAMP(), 405, 'DAY'), HOUR), MINUTE) AS _expr54, DATE_ADD( - DATE_ADD(DATE_TRUNC('SECOND', DATE_TRUNC('YEAR', CURRENT_TIMESTAMP())), 98, 'YEAR'), + DATE_ADD(TIMESTAMP_TRUNC(DATE_TRUNC('YEAR', CURRENT_TIMESTAMP()), SECOND), 98, 'YEAR'), 96, 'MONTH' ) AS _expr55, DATE_ADD( - DATE_TRUNC('DAY', DATE_TRUNC('SECOND', DATE_TRUNC('MINUTE', CURRENT_TIMESTAMP()))), + DATE_TRUNC('DAY', TIMESTAMP_TRUNC(TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), MINUTE), SECOND)), 78, 'SECOND' ) AS _expr56, diff --git a/tests/test_sql_refsols/datetime_sampler_mysql.sql b/tests/test_sql_refsols/datetime_sampler_mysql.sql index 5bf98d93b..0075b3059 100644 --- a/tests/test_sql_refsols/datetime_sampler_mysql.sql +++ b/tests/test_sql_refsols/datetime_sampler_mysql.sql @@ -19,7 +19,10 @@ SELECT CURRENT_TIMESTAMP() AS _expr17, CURRENT_TIMESTAMP() AS _expr18, CAST(o_orderdate AS DATETIME) AS _expr19, - DATE(CURRENT_TIMESTAMP()) AS _expr20, + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(SECOND, '0000-01-01 00:00:00', CURRENT_TIMESTAMP())) SECOND + ) AS _expr20, DATE_ADD( DATE_ADD( CAST(STR_TO_DATE(CONCAT(YEAR(CURRENT_TIMESTAMP()), ' 1 1'), '%Y %c %e') AS DATETIME), @@ -27,16 +30,54 @@ SELECT ), INTERVAL '-141' MONTH ) AS _expr21, - STR_TO_DATE( - CONCAT(YEAR(CURRENT_TIMESTAMP()), ' ', MONTH(CURRENT_TIMESTAMP()), ' 1'), - '%Y %c %e' + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + HOUR, + '0000-01-01 00:00:00', + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + SECOND, + '0000-01-01 00:00:00', + STR_TO_DATE( + CONCAT(YEAR(CURRENT_TIMESTAMP()), ' ', MONTH(CURRENT_TIMESTAMP()), ' 1'), + '%Y %c %e' + ) + )) SECOND + ) + )) HOUR ) AS _expr22, - DATE(CURRENT_TIMESTAMP()) AS _expr23, + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + HOUR, + '0000-01-01 00:00:00', + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + SECOND, + '0000-01-01 00:00:00', + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(HOUR, '0000-01-01 00:00:00', CURRENT_TIMESTAMP())) HOUR + ) + )) SECOND + ) + )) HOUR + ) AS _expr23, DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-96' HOUR), INTERVAL '15' YEAR) AS _expr24, DATE_ADD( DATE_ADD( - STR_TO_DATE(CONCAT(YEAR(CURRENT_TIMESTAMP()), ' 1 1'), '%Y %c %e'), - INTERVAL '-3' YEAR + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + MINUTE, + '0000-01-01 00:00:00', + DATE_ADD( + STR_TO_DATE(CONCAT(YEAR(CURRENT_TIMESTAMP()), ' 1 1'), '%Y %c %e'), + INTERVAL '-3' YEAR + ) + )) MINUTE ), INTERVAL '65' MONTH ) AS _expr25, @@ -44,12 +85,28 @@ SELECT CONCAT(YEAR(DATE_ADD(CAST(o_orderdate AS DATETIME), INTERVAL '-56' HOUR)), ' 1 1'), '%Y %c %e' ) AS _expr26, - DATE(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-63' DAY)) AS _expr27, + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + SECOND, + '0000-01-01 00:00:00', + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(MINUTE, '0000-01-01 00:00:00', DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-63' DAY))) MINUTE + ) + )) SECOND + ) AS _expr27, STR_TO_DATE( CONCAT(YEAR(CURRENT_TIMESTAMP()), ' ', MONTH(CURRENT_TIMESTAMP()), ' 1'), '%Y %c %e' ) AS _expr28, - DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-312' HOUR), INTERVAL '48' YEAR) AS _expr29, + DATE_ADD( + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(SECOND, '0000-01-01 00:00:00', DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-312' HOUR))) SECOND + ), + INTERVAL '48' YEAR + ) AS _expr29, DATE_ADD( DATE_ADD( CAST(CAST(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '75' DAY) AS DATE) AS DATETIME), @@ -67,7 +124,14 @@ SELECT ), INTERVAL '-45' YEAR ) AS _expr31, - CAST(DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-270' MINUTE), INTERVAL '-34' SECOND) AS DATE) AS _expr32, + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + SECOND, + '0000-01-01 00:00:00', + CAST(DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-270' MINUTE), INTERVAL '-34' SECOND) AS DATE) + )) SECOND + ) AS _expr32, DATE_ADD( CAST(STR_TO_DATE( CONCAT(YEAR(CURRENT_TIMESTAMP()), ' ', MONTH(CURRENT_TIMESTAMP()), ' 1'), @@ -89,7 +153,16 @@ SELECT INTERVAL '344' SECOND ) AS _expr34, CAST(CURRENT_TIMESTAMP() AS DATE) AS _expr35, - CAST('2116-01-01 00:49:00' AS DATETIME) AS _expr36, + DATE_ADD( + DATE_ADD( + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(HOUR, '0000-01-01 00:00:00', CAST('2025-01-01' AS DATETIME))) HOUR + ), + INTERVAL '49' MINUTE + ), + INTERVAL '91' YEAR + ) AS _expr36, STR_TO_DATE(CONCAT(YEAR(CURRENT_TIMESTAMP()), ' 1 1'), '%Y %c %e') AS _expr37, STR_TO_DATE(CONCAT(YEAR(CAST(CURRENT_TIMESTAMP() AS DATE)), ' 1 1'), '%Y %c %e') AS _expr38, DATE_ADD( @@ -121,13 +194,44 @@ SELECT '%Y %c %e' ) AS _expr41, DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '192' MONTH) AS _expr42, - DATE_ADD(CAST(DATE(CURRENT_TIMESTAMP()) AS DATETIME), INTERVAL '486' MINUTE) AS _expr43, - DATE_ADD(CAST(DATE(CURRENT_TIMESTAMP()) AS DATETIME), INTERVAL '-50' HOUR) AS _expr44, - DATE( + DATE_ADD( + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + HOUR, + '0000-01-01 00:00:00', + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + MINUTE, + '0000-01-01 00:00:00', + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(HOUR, '0000-01-01 00:00:00', CURRENT_TIMESTAMP())) HOUR + ) + )) MINUTE + ) + )) HOUR + ), + INTERVAL '486' MINUTE + ) AS _expr43, + DATE_ADD( DATE_ADD( - DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '297' DAY), INTERVAL '72' MONTH), - INTERVAL '-92' MONTH - ) + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(SECOND, '0000-01-01 00:00:00', CURRENT_TIMESTAMP())) SECOND + ), + INTERVAL '-50' HOUR + ) AS _expr44, + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + HOUR, + '0000-01-01 00:00:00', + DATE_ADD( + DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '297' DAY), INTERVAL '72' MONTH), + INTERVAL '-92' MONTH + ) + )) HOUR ) AS _expr45, CAST(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '285' SECOND) AS DATE) AS _expr46, CAST('1999-05-15 00:00:00' AS DATETIME) AS _expr47, @@ -160,43 +264,102 @@ SELECT INTERVAL '-21' DAY ) AS _expr48, DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '212' MINUTE), INTERVAL '368' YEAR) AS _expr49, - STR_TO_DATE( - CONCAT( - YEAR( - STR_TO_DATE( - CONCAT( - YEAR(CAST('2024-12-31 11:59:00' AS DATETIME)), - ' ', - MONTH(CAST('2024-12-31 11:59:00' AS DATETIME)), - ' 1' - ), - '%Y %c %e' - ) - ), - ' 1 1' - ), - '%Y %c %e' + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + MINUTE, + '0000-01-01 00:00:00', + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + MINUTE, + '0000-01-01 00:00:00', + STR_TO_DATE( + CONCAT( + YEAR( + STR_TO_DATE( + CONCAT( + YEAR(CAST('2024-12-31 11:59:00' AS DATETIME)), + ' ', + MONTH(CAST('2024-12-31 11:59:00' AS DATETIME)), + ' 1' + ), + '%Y %c %e' + ) + ), + ' 1 1' + ), + '%Y %c %e' + ) + )) MINUTE + ) + )) MINUTE ) AS _expr50, - DATE(CAST('1999-03-14' AS DATETIME)) AS _expr51, + CAST(DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(HOUR, '0000-01-01 00:00:00', CAST('1999-03-14' AS DATETIME))) HOUR + ) AS DATE) AS _expr51, DATE_ADD( - CAST(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-60' HOUR) AS DATE), + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + MINUTE, + '0000-01-01 00:00:00', + CAST(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-60' HOUR) AS DATE) + )) MINUTE + ), INTERVAL '196' YEAR ) AS _expr52, DATE_ADD( - CAST(DATE( - DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-40' HOUR), INTERVAL '-385' DAY) - ) AS DATETIME), + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + MINUTE, + '0000-01-01 00:00:00', + DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '-40' HOUR), INTERVAL '-385' DAY) + )) MINUTE + ), INTERVAL '29' HOUR ) AS _expr53, - DATE(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '405' DAY)) AS _expr54, + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + MINUTE, + '0000-01-01 00:00:00', + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(HOUR, '0000-01-01 00:00:00', DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '405' DAY))) HOUR + ) + )) MINUTE + ) AS _expr54, DATE_ADD( DATE_ADD( - STR_TO_DATE(CONCAT(YEAR(CURRENT_TIMESTAMP()), ' 1 1'), '%Y %c %e'), + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + SECOND, + '0000-01-01 00:00:00', + STR_TO_DATE(CONCAT(YEAR(CURRENT_TIMESTAMP()), ' 1 1'), '%Y %c %e') + )) SECOND + ), INTERVAL '98' YEAR ), INTERVAL '96' MONTH ) AS _expr55, - DATE_ADD(CAST(DATE(CURRENT_TIMESTAMP()) AS DATETIME), INTERVAL '78' SECOND) AS _expr56, + DATE_ADD( + CAST(CAST(DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF( + SECOND, + '0000-01-01 00:00:00', + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(MINUTE, '0000-01-01 00:00:00', CURRENT_TIMESTAMP())) MINUTE + ) + )) SECOND + ) AS DATE) AS DATETIME), + INTERVAL '78' SECOND + ) AS _expr56, DATE_ADD( DATE_ADD( DATE_ADD(DATE_ADD(CURRENT_TIMESTAMP(), INTERVAL '136' HOUR), INTERVAL '104' MINUTE), diff --git a/tests/test_sql_refsols/datetime_sampler_snowflake.sql b/tests/test_sql_refsols/datetime_sampler_snowflake.sql new file mode 100644 index 000000000..5d6798a50 --- /dev/null +++ b/tests/test_sql_refsols/datetime_sampler_snowflake.sql @@ -0,0 +1,170 @@ +SELECT + CAST('2025-07-04 12:58:45' AS TIMESTAMP) AS _expr0, + CAST('2024-12-31 11:59:00' AS TIMESTAMP) AS _expr1, + CAST('2025-01-01' AS TIMESTAMP) AS _expr2, + CAST('1999-03-14' AS TIMESTAMP) AS _expr3, + CURRENT_TIMESTAMP() AS _expr4, + CURRENT_TIMESTAMP() AS _expr5, + CURRENT_TIMESTAMP() AS _expr6, + CURRENT_TIMESTAMP() AS _expr7, + CURRENT_TIMESTAMP() AS _expr8, + CURRENT_TIMESTAMP() AS _expr9, + CURRENT_TIMESTAMP() AS _expr10, + CURRENT_TIMESTAMP() AS _expr11, + CURRENT_TIMESTAMP() AS _expr12, + CURRENT_TIMESTAMP() AS _expr13, + CURRENT_TIMESTAMP() AS _expr14, + CURRENT_TIMESTAMP() AS _expr15, + CURRENT_TIMESTAMP() AS _expr16, + CURRENT_TIMESTAMP() AS _expr17, + CURRENT_TIMESTAMP() AS _expr18, + CAST(o_orderdate AS TIMESTAMP) AS _expr19, + DATE_TRUNC('SECOND', CURRENT_TIMESTAMP()) AS _expr20, + DATEADD(MONTH, -141, DATEADD(MINUTE, 8, DATE_TRUNC('YEAR', CURRENT_TIMESTAMP()))) AS _expr21, + DATE_TRUNC('HOUR', DATE_TRUNC('SECOND', DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()))) AS _expr22, + DATE_TRUNC('HOUR', DATE_TRUNC('SECOND', DATE_TRUNC('HOUR', CURRENT_TIMESTAMP()))) AS _expr23, + DATEADD(YEAR, 15, DATEADD(HOUR, -96, CURRENT_TIMESTAMP())) AS _expr24, + DATEADD( + MONTH, + 65, + DATE_TRUNC('MINUTE', DATEADD(YEAR, -3, DATE_TRUNC('YEAR', CURRENT_TIMESTAMP()))) + ) AS _expr25, + DATE_TRUNC('YEAR', DATEADD(HOUR, -56, CAST(o_orderdate AS TIMESTAMP))) AS _expr26, + DATE_TRUNC('SECOND', DATE_TRUNC('MINUTE', DATEADD(DAY, -63, CURRENT_TIMESTAMP()))) AS _expr27, + DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()) AS _expr28, + DATEADD(YEAR, 48, DATE_TRUNC('SECOND', DATEADD(HOUR, -312, CURRENT_TIMESTAMP()))) AS _expr29, + DATEADD( + DAY, + -294, + DATEADD(MINUTE, 600, DATE_TRUNC('DAY', DATEADD(DAY, 75, CURRENT_TIMESTAMP()))) + ) AS _expr30, + DATEADD(YEAR, -45, DATEADD(MONTH, 480, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()))) AS _expr31, + DATE_TRUNC( + 'SECOND', + DATE_TRUNC('DAY', DATEADD(SECOND, -34, DATEADD(MINUTE, -270, CURRENT_TIMESTAMP()))) + ) AS _expr32, + DATEADD(SECOND, 213, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP())) AS _expr33, + DATEADD( + SECOND, + 344, + DATEADD(YEAR, 28, DATEADD(MINUTE, 13, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()))) + ) AS _expr34, + DATE_TRUNC('DAY', CURRENT_TIMESTAMP()) AS _expr35, + DATEADD(YEAR, 91, DATEADD(MINUTE, 49, DATE_TRUNC('HOUR', CAST('2025-01-01' AS TIMESTAMP)))) AS _expr36, + DATE_TRUNC('DAY', DATE_TRUNC('YEAR', CURRENT_TIMESTAMP())) AS _expr37, + DATE_TRUNC('YEAR', DATE_TRUNC('DAY', CURRENT_TIMESTAMP())) AS _expr38, + DATEADD(MINUTE, 22, DATE_TRUNC('MONTH', CAST('2025-07-04 12:58:45' AS TIMESTAMP))) AS _expr39, + DATE_TRUNC('YEAR', CURRENT_TIMESTAMP()) AS _expr40, + DATE_TRUNC( + 'YEAR', + DATEADD( + SECOND, + -160, + DATEADD(SECOND, 415, DATEADD(SECOND, 82, CAST(o_orderdate AS TIMESTAMP))) + ) + ) AS _expr41, + DATEADD(MONTH, 192, CURRENT_TIMESTAMP()) AS _expr42, + DATEADD( + MINUTE, + 486, + DATE_TRUNC('HOUR', DATE_TRUNC('MINUTE', DATE_TRUNC('HOUR', CURRENT_TIMESTAMP()))) + ) AS _expr43, + DATEADD(HOUR, -50, DATE_TRUNC('SECOND', CURRENT_TIMESTAMP())) AS _expr44, + DATE_TRUNC( + 'HOUR', + DATEADD(MONTH, -92, DATEADD(MONTH, 72, DATEADD(DAY, 297, CURRENT_TIMESTAMP()))) + ) AS _expr45, + DATE_TRUNC('DAY', DATEADD(SECOND, 285, CURRENT_TIMESTAMP())) AS _expr46, + CAST('1999-05-15 00:00:00' AS TIMESTAMP) AS _expr47, + DATEADD( + DAY, + -21, + DATE_TRUNC('MONTH', DATEADD(HOUR, 1, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()))) + ) AS _expr48, + DATEADD(YEAR, 368, DATEADD(MINUTE, 212, CURRENT_TIMESTAMP())) AS _expr49, + DATE_TRUNC( + 'MINUTE', + DATE_TRUNC( + 'MINUTE', + DATE_TRUNC('YEAR', DATE_TRUNC('MONTH', CAST('2024-12-31 11:59:00' AS TIMESTAMP))) + ) + ) AS _expr50, + DATE_TRUNC('DAY', DATE_TRUNC('HOUR', CAST('1999-03-14' AS TIMESTAMP))) AS _expr51, + DATEADD( + YEAR, + 196, + DATE_TRUNC('MINUTE', DATE_TRUNC('DAY', DATEADD(HOUR, -60, CURRENT_TIMESTAMP()))) + ) AS _expr52, + DATEADD( + HOUR, + 29, + DATE_TRUNC('MINUTE', DATEADD(DAY, -385, DATEADD(HOUR, -40, CURRENT_TIMESTAMP()))) + ) AS _expr53, + DATE_TRUNC('MINUTE', DATE_TRUNC('HOUR', DATEADD(DAY, 405, CURRENT_TIMESTAMP()))) AS _expr54, + DATEADD( + MONTH, + 96, + DATEADD(YEAR, 98, DATE_TRUNC('SECOND', DATE_TRUNC('YEAR', CURRENT_TIMESTAMP()))) + ) AS _expr55, + DATEADD( + SECOND, + 78, + DATE_TRUNC('DAY', DATE_TRUNC('SECOND', DATE_TRUNC('MINUTE', CURRENT_TIMESTAMP()))) + ) AS _expr56, + DATEADD( + DAY, + 312, + DATEADD(MONTH, -104, DATEADD(MINUTE, 104, DATEADD(HOUR, 136, CURRENT_TIMESTAMP()))) + ) AS _expr57, + DATEADD(SECOND, -135, DATEADD(MONTH, 45, CURRENT_TIMESTAMP())) AS _expr58, + YEAR(CURRENT_TIMESTAMP()) AS _expr59, + YEAR(CAST('2025-07-04 12:58:45' AS TIMESTAMP)) AS _expr60, + YEAR(CAST('1999-03-14' AS TIMESTAMP)) AS _expr61, + MONTH(CURRENT_TIMESTAMP()) AS _expr62, + MONTH(CAST(CAST('2001-06-30' AS DATE) AS TIMESTAMP)) AS _expr63, + MONTH(CAST('1999-03-14' AS TIMESTAMP)) AS _expr64, + DAY(CURRENT_TIMESTAMP()) AS _expr65, + DAY(CAST('2025-07-04 12:58:45' AS TIMESTAMP)) AS _expr66, + DAY(CAST('2025-07-04 12:58:45' AS TIMESTAMP)) AS _expr67, + HOUR(CURRENT_TIMESTAMP()) AS _expr68, + HOUR(CAST(CAST('2001-06-30' AS DATE) AS TIMESTAMP)) AS _expr69, + HOUR(CAST('2024-01-01' AS TIMESTAMP)) AS _expr70, + MINUTE(CURRENT_TIMESTAMP()) AS _expr71, + MINUTE(CAST('2024-12-25 20:30:59' AS TIMESTAMP)) AS _expr72, + MINUTE(CAST('2024-01-01' AS TIMESTAMP)) AS _expr73, + SECOND(CURRENT_TIMESTAMP()) AS _expr74, + SECOND(CAST('2025-07-04 12:58:45' AS TIMESTAMP)) AS _expr75, + SECOND(CAST('1999-03-14' AS TIMESTAMP)) AS _expr76, + DATEDIFF(YEAR, CAST('2018-02-14 12:41:06' AS TIMESTAMP), CURRENT_TIMESTAMP()) AS _expr77, + DATEDIFF(YEAR, CAST(o_orderdate AS DATETIME), CAST('2022-11-24' AS DATE)) AS _expr78, + DATEDIFF(MONTH, CAST('2005-06-30' AS DATE), CAST('1999-03-14' AS TIMESTAMP)) AS _expr79, + DATEDIFF(MONTH, CAST('2006-05-01 12:00:00' AS TIMESTAMP), CAST('2022-11-24' AS DATE)) AS _expr80, + DATEDIFF(DAY, CURRENT_TIMESTAMP(), CURRENT_TIMESTAMP()) AS _expr81, + DATEDIFF(DAY, CAST('1999-03-14' AS TIMESTAMP), CURRENT_TIMESTAMP()) AS _expr82, + DATEDIFF(HOUR, CURRENT_TIMESTAMP(), CURRENT_TIMESTAMP()) AS _expr83, + DATEDIFF(HOUR, CAST('2005-06-30' AS DATE), CAST(o_orderdate AS DATETIME)) AS _expr84, + DATEDIFF(MINUTE, CURRENT_TIMESTAMP(), CAST('2006-05-01 12:00:00' AS TIMESTAMP)) AS _expr85, + DATEDIFF(MINUTE, CAST(o_orderdate AS DATETIME), CAST('2021-01-01 07:35:00' AS TIMESTAMP)) AS _expr86, + DATEDIFF(SECOND, CAST('2022-11-24' AS DATE), CAST('2021-01-01 07:35:00' AS TIMESTAMP)) AS _expr87, + DATEDIFF(SECOND, CAST('2005-06-30' AS DATE), CAST('2018-02-14 12:41:06' AS TIMESTAMP)) AS _expr88, + DATEDIFF(YEAR, CAST(o_orderdate AS DATETIME), CAST('2006-05-01 12:00:00' AS TIMESTAMP)) AS _expr89, + DATEDIFF(YEAR, CAST('2018-02-14 12:41:06' AS TIMESTAMP), CAST(o_orderdate AS DATETIME)) AS _expr90, + DATEDIFF(MONTH, CAST(o_orderdate AS DATETIME), CAST('2019-07-04 11:30:00' AS TIMESTAMP)) AS _expr91, + DATEDIFF( + MONTH, + CAST('2019-07-04 11:30:00' AS TIMESTAMP), + CAST('2018-02-14 12:41:06' AS TIMESTAMP) + ) AS _expr92, + DATEDIFF(DAY, CURRENT_TIMESTAMP(), CAST(o_orderdate AS DATETIME)) AS _expr93, + DATEDIFF(DAY, CAST('2019-07-04 11:30:00' AS TIMESTAMP), CURRENT_TIMESTAMP()) AS _expr94, + DATEDIFF(HOUR, CAST('2022-11-24' AS DATE), CAST('1999-03-14' AS TIMESTAMP)) AS _expr95, + DATEDIFF( + HOUR, + CAST('2018-02-14 12:41:06' AS TIMESTAMP), + CAST('2020-12-31 00:31:06' AS TIMESTAMP) + ) AS _expr96, + DATEDIFF(MINUTE, CAST('2005-06-30' AS DATE), CAST('2020-12-31 00:31:06' AS TIMESTAMP)) AS _expr97, + DATEDIFF(MINUTE, CURRENT_TIMESTAMP(), CAST('2018-02-14 12:41:06' AS TIMESTAMP)) AS _expr98, + DATEDIFF(SECOND, CURRENT_TIMESTAMP(), CAST('1999-03-14' AS TIMESTAMP)) AS _expr99, + DATEDIFF(SECOND, CAST('2022-11-24' AS DATE), CAST('2019-07-04 11:30:00' AS TIMESTAMP)) AS _expr100 +FROM tpch.orders diff --git a/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql new file mode 100644 index 000000000..b7b4c5729 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv10_snowflake.sql @@ -0,0 +1,24 @@ +WITH _s1 AS ( + SELECT + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) AS expr_1, + MONTH(CAST(sbtxdatetime AS TIMESTAMP)) AS expr_2, + COUNT(*) AS n_rows, + sbtxcustid + FROM main.sbtransaction + GROUP BY + 1, + 2, + 4 +) +SELECT + sbcustomer.sbcustid AS _id, + sbcustomer.sbcustname AS name, + COALESCE(_s1.n_rows, 0) AS num_transactions +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.expr_1 = YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND _s1.expr_2 = MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) + AND _s1.sbtxcustid = sbcustomer.sbcustid +ORDER BY + 3 DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_adv11_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv11_snowflake.sql new file mode 100644 index 000000000..f497c81e3 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv11_snowflake.sql @@ -0,0 +1,17 @@ +WITH _u_0 AS ( + SELECT + sbtransaction.sbtxcustid AS _u_1 + FROM main.sbtransaction AS sbtransaction + JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid + AND sbticker.sbtickersymbol IN ('AMZN', 'AAPL', 'GOOGL', 'META', 'NFLX') + GROUP BY + 1 +) +SELECT + COUNT(*) AS n_customers +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbcustomer.sbcustid +WHERE + ENDSWITH(sbcustomer.sbcustemail, '.com') AND NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_broker_adv12_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv12_snowflake.sql new file mode 100644 index 000000000..b2ee83029 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv12_snowflake.sql @@ -0,0 +1,8 @@ +SELECT + COUNT(*) AS n_customers +FROM main.sbcustomer +WHERE + ( + ENDSWITH(LOWER(sbcustname), 'ez') OR STARTSWITH(LOWER(sbcustname), 'j') + ) + AND ENDSWITH(LOWER(sbcuststate), 'a') diff --git a/tests/test_sql_refsols/defog_broker_adv13_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv13_snowflake.sql new file mode 100644 index 000000000..011c5ba86 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv13_snowflake.sql @@ -0,0 +1,8 @@ +SELECT + sbcustcountry AS cust_country, + COUNT(*) AS TAC +FROM main.sbcustomer +WHERE + sbcustjoindate >= CAST('2023-01-01' AS DATE) +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_broker_adv14_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv14_snowflake.sql new file mode 100644 index 000000000..0d9f76898 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv14_snowflake.sql @@ -0,0 +1,9 @@ +SELECT + sbticker.sbtickertype AS ticker_type, + AVG(sbdailyprice.sbdpclose) AS ACP +FROM main.sbticker AS sbticker +JOIN main.sbdailyprice AS sbdailyprice + ON DATEDIFF(DAY, CAST(sbdailyprice.sbdpdate AS DATETIME), CURRENT_TIMESTAMP()) <= 7 + AND sbdailyprice.sbdptickerid = sbticker.sbtickerid +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_broker_adv15_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv15_snowflake.sql new file mode 100644 index 000000000..e735bcfcd --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv15_snowflake.sql @@ -0,0 +1,10 @@ +SELECT + sbcustcountry AS country, + 100 * ( + COALESCE(COUNT_IF(sbcuststatus = 'active'), 0) / COUNT(*) + ) AS ar +FROM main.sbcustomer +WHERE + sbcustjoindate <= '2022-12-31' AND sbcustjoindate >= '2022-01-01' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_broker_adv16_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv16_snowflake.sql new file mode 100644 index 000000000..93d7cefd8 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv16_snowflake.sql @@ -0,0 +1,23 @@ +WITH _s1 AS ( + SELECT + SUM(sbtxtax + sbtxcommission) AS sum_expr_2, + SUM(sbtxamount) AS sum_sbtxamount, + sbtxtickerid + FROM main.sbtransaction + WHERE + sbtxdatetime >= DATEADD(MONTH, -1, CURRENT_TIMESTAMP()) AND sbtxtype = 'sell' + GROUP BY + 3 +) +SELECT + sbticker.sbtickersymbol AS symbol, + ( + 100.0 * ( + COALESCE(_s1.sum_sbtxamount, 0) - COALESCE(_s1.sum_expr_2, 0) + ) + ) / COALESCE(_s1.sum_sbtxamount, 0) AS SPM +FROM main.sbticker AS sbticker +JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_broker_adv1_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv1_snowflake.sql new file mode 100644 index 000000000..b83dcb2ac --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv1_snowflake.sql @@ -0,0 +1,17 @@ +WITH _s1 AS ( + SELECT + SUM(sbtxamount) AS sum_sbtxamount, + sbtxcustid + FROM main.sbtransaction + GROUP BY + 2 +) +SELECT + sbcustomer.sbcustname AS name, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxcustid = sbcustomer.sbcustid +ORDER BY + 2 DESC NULLS LAST +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_adv2_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv2_snowflake.sql new file mode 100644 index 000000000..b4dfab519 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv2_snowflake.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + sbtxtickerid + FROM main.sbtransaction + WHERE + sbtxdatetime >= DATE_TRUNC('DAY', DATEADD(DAY, -10, CURRENT_TIMESTAMP())) + AND sbtxtype = 'buy' + GROUP BY + 2 +) +SELECT + sbticker.sbtickersymbol AS symbol, + COALESCE(_s1.n_rows, 0) AS tx_count +FROM main.sbticker AS sbticker +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid +ORDER BY + 2 DESC NULLS LAST +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_broker_adv3_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv3_snowflake.sql new file mode 100644 index 000000000..542ba352c --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv3_snowflake.sql @@ -0,0 +1,19 @@ +WITH _t1 AS ( + SELECT + COUNT(*) AS n_rows, + COUNT_IF(sbtxstatus = 'success') AS sum_expr_2, + sbtxcustid + FROM main.sbtransaction + GROUP BY + 3 +) +SELECT + sbcustomer.sbcustname AS name, + ( + 100.0 * COALESCE(_t1.sum_expr_2, 0) + ) / _t1.n_rows AS success_rate +FROM main.sbcustomer AS sbcustomer +JOIN _t1 AS _t1 + ON _t1.n_rows >= 5 AND _t1.sbtxcustid = sbcustomer.sbcustid +ORDER BY + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_broker_adv4_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv4_snowflake.sql new file mode 100644 index 000000000..37d5e5c4a --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv4_snowflake.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + MAX(sbdphigh) AS max_sbdphigh, + MIN(sbdplow) AS min_sbdplow, + sbdptickerid + FROM main.sbdailyprice + WHERE + sbdpdate <= CAST('2023-04-04' AS DATE) AND sbdpdate >= CAST('2023-04-01' AS DATE) + GROUP BY + 3 +) +SELECT + sbticker.sbtickersymbol AS symbol, + _s1.max_sbdphigh - _s1.min_sbdplow AS price_change +FROM main.sbticker AS sbticker +LEFT JOIN _s1 AS _s1 + ON _s1.sbdptickerid = sbticker.sbtickerid +ORDER BY + 2 DESC NULLS LAST +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql new file mode 100644 index 000000000..09b128632 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv5_snowflake.sql @@ -0,0 +1,43 @@ +WITH _s0 AS ( + SELECT + COUNT(sbdpclose) AS count_sbdpclose, + MAX(sbdphigh) AS max_high, + MIN(sbdplow) AS min_low, + CONCAT_WS( + '-', + YEAR(CAST(sbdpdate AS TIMESTAMP)), + LPAD(MONTH(CAST(sbdpdate AS TIMESTAMP)), 2, '0') + ) AS month, + SUM(sbdpclose) AS sum_sbdpclose, + sbdptickerid + FROM main.sbdailyprice + GROUP BY + 4, + 6 +), _t0 AS ( + SELECT + MAX(_s0.max_high) AS max_high, + MIN(_s0.min_low) AS min_low, + SUM(_s0.count_sbdpclose) AS sum_count_sbdpclose, + SUM(_s0.sum_sbdpclose) AS sum_sum_sbdpclose, + _s0.month, + sbticker.sbtickersymbol + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbdptickerid = sbticker.sbtickerid + GROUP BY + 5, + 6 +) +SELECT + sbtickersymbol AS symbol, + month, + sum_sum_sbdpclose / sum_count_sbdpclose AS avg_close, + max_high, + min_low, + ( + ( + sum_sum_sbdpclose / sum_count_sbdpclose + ) - LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) + ) / LAG(sum_sum_sbdpclose / sum_count_sbdpclose, 1) OVER (PARTITION BY sbtickersymbol ORDER BY month) AS momc +FROM _t0 diff --git a/tests/test_sql_refsols/defog_broker_adv6_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv6_snowflake.sql new file mode 100644 index 000000000..cbbff3f2a --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv6_snowflake.sql @@ -0,0 +1,17 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(sbtxamount) AS sum_sbtxamount, + sbtxcustid + FROM main.sbtransaction + GROUP BY + 3 +) +SELECT + sbcustomer.sbcustname AS name, + _s1.n_rows AS num_tx, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount, + RANK() OVER (ORDER BY COALESCE(_s1.sum_sbtxamount, 0) DESC) AS cust_rank +FROM main.sbcustomer AS sbcustomer +JOIN _s1 AS _s1 + ON _s1.sbtxcustid = sbcustomer.sbcustid diff --git a/tests/test_sql_refsols/defog_broker_adv7_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv7_snowflake.sql new file mode 100644 index 000000000..c2416b560 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv7_snowflake.sql @@ -0,0 +1,40 @@ +WITH _s2 AS ( + SELECT + CONCAT_WS( + '-', + YEAR(CAST(sbcustjoindate AS TIMESTAMP)), + LPAD(MONTH(CAST(sbcustjoindate AS TIMESTAMP)), 2, '0') + ) AS month, + COUNT(*) AS n_rows + FROM main.sbcustomer + WHERE + sbcustjoindate < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()) + AND sbcustjoindate >= DATE_TRUNC('MONTH', DATEADD(MONTH, -6, CURRENT_TIMESTAMP())) + GROUP BY + 1 +), _s3 AS ( + SELECT + AVG(sbtransaction.sbtxamount) AS avg_sbtxamount, + CONCAT_WS( + '-', + YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)), + LPAD(MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)), 2, '0') + ) AS month + FROM main.sbcustomer AS sbcustomer + JOIN main.sbtransaction AS sbtransaction + ON MONTH(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) = MONTH(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) + AND YEAR(CAST(sbcustomer.sbcustjoindate AS TIMESTAMP)) = YEAR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid + WHERE + sbcustomer.sbcustjoindate < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()) + AND sbcustomer.sbcustjoindate >= DATE_TRUNC('MONTH', DATEADD(MONTH, -6, CURRENT_TIMESTAMP())) + GROUP BY + 2 +) +SELECT + _s2.month, + _s2.n_rows AS customer_signups, + _s3.avg_sbtxamount AS avg_tx_amount +FROM _s2 AS _s2 +LEFT JOIN _s3 AS _s3 + ON _s2.month = _s3.month diff --git a/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql new file mode 100644 index 000000000..50141aab4 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv8_snowflake.sql @@ -0,0 +1,45 @@ +WITH _u_0 AS ( + SELECT + sbcustid AS _u_1 + FROM main.sbcustomer + WHERE + LOWER(sbcustcountry) = 'usa' + GROUP BY + 1 +) +SELECT + CASE WHEN COUNT(*) > 0 THEN COUNT(*) ELSE NULL END AS n_transactions, + COALESCE(SUM(sbtransaction.sbtxamount), 0) AS total_amount +FROM main.sbtransaction AS sbtransaction +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbtransaction.sbtxcustid +WHERE + NOT _u_0._u_1 IS NULL + AND sbtransaction.sbtxdatetime < DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CURRENT_TIMESTAMP()) + 6 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP() + ) + ) + AND sbtransaction.sbtxdatetime >= DATEADD( + WEEK, + -1, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CURRENT_TIMESTAMP()) + 6 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP() + ) + ) + ) diff --git a/tests/test_sql_refsols/defog_broker_adv9_snowflake.sql b/tests/test_sql_refsols/defog_broker_adv9_snowflake.sql new file mode 100644 index 000000000..e98831783 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_adv9_snowflake.sql @@ -0,0 +1,57 @@ +SELECT + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) + 6 + ) % 7 + ) * -1, + CAST(sbtransaction.sbtxdatetime AS TIMESTAMP) + ) + ) AS week, + COUNT(*) AS num_transactions, + COALESCE( + COUNT_IF(( + ( + DAYOFWEEK(sbtransaction.sbtxdatetime) + 6 + ) % 7 + ) IN (5, 6)), + 0 + ) AS weekend_transactions +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid + AND sbticker.sbtickertype = 'stock' +WHERE + sbtransaction.sbtxdatetime < DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CURRENT_TIMESTAMP()) + 6 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP() + ) + ) + AND sbtransaction.sbtxdatetime >= DATEADD( + WEEK, + -8, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CURRENT_TIMESTAMP()) + 6 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP() + ) + ) + ) +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_broker_basic10_snowflake.sql b/tests/test_sql_refsols/defog_broker_basic10_snowflake.sql new file mode 100644 index 000000000..1b3a16cf5 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic10_snowflake.sql @@ -0,0 +1,15 @@ +WITH _u_0 AS ( + SELECT + sbdptickerid AS _u_1 + FROM main.sbdailyprice + GROUP BY + 1 +) +SELECT + sbticker.sbtickerid AS _id, + sbticker.sbtickersymbol AS symbol +FROM main.sbticker AS sbticker +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbticker.sbtickerid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_broker_basic1_snowflake.sql b/tests/test_sql_refsols/defog_broker_basic1_snowflake.sql new file mode 100644 index 000000000..121a66b86 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic1_snowflake.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(sbtxamount) AS sum_sbtxamount, + sbtxcustid + FROM main.sbtransaction + WHERE + sbtxdatetime >= DATE_TRUNC('DAY', DATEADD(DAY, -30, CURRENT_TIMESTAMP())) + GROUP BY + 3 +) +SELECT + sbcustomer.sbcustcountry AS country, + COALESCE(SUM(_s1.n_rows), 0) AS num_transactions, + COALESCE(SUM(_s1.sum_sbtxamount), 0) AS total_amount +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxcustid = sbcustomer.sbcustid +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_broker_basic2_snowflake.sql b/tests/test_sql_refsols/defog_broker_basic2_snowflake.sql new file mode 100644 index 000000000..2c2ba028b --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic2_snowflake.sql @@ -0,0 +1,14 @@ +SELECT + sbtxtype AS transaction_type, + COUNT(DISTINCT sbtxcustid) AS num_customers, + AVG(sbtxshares) AS avg_shares +FROM main.sbtransaction +WHERE + sbtxdatetime <= CAST('2023-03-31' AS DATE) + AND sbtxdatetime >= CAST('2023-01-01' AS DATE) +GROUP BY + 1 +ORDER BY + 2 DESC NULLS LAST, + 1 NULLS FIRST +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_basic3_snowflake.sql b/tests/test_sql_refsols/defog_broker_basic3_snowflake.sql new file mode 100644 index 000000000..14a43a168 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic3_snowflake.sql @@ -0,0 +1,19 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(sbtxamount) AS sum_sbtxamount, + sbtxtickerid + FROM main.sbtransaction + GROUP BY + 3 +) +SELECT + sbticker.sbtickersymbol AS symbol, + COALESCE(_s1.n_rows, 0) AS num_transactions, + COALESCE(_s1.sum_sbtxamount, 0) AS total_amount +FROM main.sbticker AS sbticker +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxtickerid = sbticker.sbtickerid +ORDER BY + 3 DESC NULLS LAST +LIMIT 10 diff --git a/tests/test_sql_refsols/defog_broker_basic4_snowflake.sql b/tests/test_sql_refsols/defog_broker_basic4_snowflake.sql new file mode 100644 index 000000000..726590352 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic4_snowflake.sql @@ -0,0 +1,34 @@ +WITH _s0 AS ( + SELECT + COUNT(*) AS num_transactions, + sbtxcustid, + sbtxtickerid + FROM main.sbtransaction + GROUP BY + 2, + 3 +), _s2 AS ( + SELECT + SUM(_s0.num_transactions) AS num_transactions, + sbticker.sbtickertype, + _s0.sbtxcustid + FROM _s0 AS _s0 + JOIN main.sbticker AS sbticker + ON _s0.sbtxtickerid = sbticker.sbtickerid + GROUP BY + 2, + 3 +) +SELECT + sbcustomer.sbcuststate AS state, + _s2.sbtickertype AS ticker_type, + SUM(_s2.num_transactions) AS num_transactions +FROM _s2 AS _s2 +JOIN main.sbcustomer AS sbcustomer + ON _s2.sbtxcustid = sbcustomer.sbcustid +GROUP BY + 1, + 2 +ORDER BY + 3 DESC NULLS LAST +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_basic5_snowflake.sql b/tests/test_sql_refsols/defog_broker_basic5_snowflake.sql new file mode 100644 index 000000000..669d92e42 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic5_snowflake.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + sbtxcustid AS _u_1 + FROM main.sbtransaction + WHERE + sbtxtype = 'buy' + GROUP BY + 1 +) +SELECT + sbcustomer.sbcustid AS _id +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbcustomer.sbcustid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_broker_basic6_snowflake.sql b/tests/test_sql_refsols/defog_broker_basic6_snowflake.sql new file mode 100644 index 000000000..170c215bd --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic6_snowflake.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + sbdptickerid AS _u_1 + FROM main.sbdailyprice + WHERE + sbdpdate >= CAST('2023-04-01' AS DATE) + GROUP BY + 1 +) +SELECT + sbticker.sbtickerid AS _id +FROM main.sbticker AS sbticker +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbticker.sbtickerid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_broker_basic7_snowflake.sql b/tests/test_sql_refsols/defog_broker_basic7_snowflake.sql new file mode 100644 index 000000000..c1f3031b1 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic7_snowflake.sql @@ -0,0 +1,9 @@ +SELECT + sbtxstatus AS status, + COUNT(*) AS num_transactions +FROM main.sbtransaction +GROUP BY + 1 +ORDER BY + 2 DESC NULLS LAST +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_broker_basic8_snowflake.sql b/tests/test_sql_refsols/defog_broker_basic8_snowflake.sql new file mode 100644 index 000000000..fb20ad227 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic8_snowflake.sql @@ -0,0 +1,9 @@ +SELECT + sbcustcountry AS country, + COUNT(*) AS num_customers +FROM main.sbcustomer +GROUP BY + 1 +ORDER BY + 2 DESC NULLS LAST +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_broker_basic9_snowflake.sql b/tests/test_sql_refsols/defog_broker_basic9_snowflake.sql new file mode 100644 index 000000000..f96baada7 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_basic9_snowflake.sql @@ -0,0 +1,15 @@ +WITH _u_0 AS ( + SELECT + sbtxcustid AS _u_1 + FROM main.sbtransaction + GROUP BY + 1 +) +SELECT + sbcustomer.sbcustid AS _id, + sbcustomer.sbcustname AS name +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = sbcustomer.sbcustid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_broker_gen1_snowflake.sql b/tests/test_sql_refsols/defog_broker_gen1_snowflake.sql new file mode 100644 index 000000000..db65bbc3a --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_gen1_snowflake.sql @@ -0,0 +1,8 @@ +SELECT + MIN(sbdailyprice.sbdpclose) AS lowest_price +FROM main.sbdailyprice AS sbdailyprice +JOIN main.sbticker AS sbticker + ON sbdailyprice.sbdptickerid = sbticker.sbtickerid + AND sbticker.sbtickersymbol = 'VTI' +WHERE + DATEDIFF(DAY, CAST(sbdailyprice.sbdpdate AS DATETIME), CURRENT_TIMESTAMP()) <= 7 diff --git a/tests/test_sql_refsols/defog_broker_gen2_snowflake.sql b/tests/test_sql_refsols/defog_broker_gen2_snowflake.sql new file mode 100644 index 000000000..e2117c631 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_gen2_snowflake.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(sbtransaction.sbtxcustid) AS transaction_count +FROM main.sbtransaction AS sbtransaction +JOIN main.sbcustomer AS sbcustomer + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + AND sbcustomer.sbcustjoindate >= DATE_TRUNC('DAY', DATEADD(DAY, -70, CURRENT_TIMESTAMP())) diff --git a/tests/test_sql_refsols/defog_broker_gen3_snowflake.sql b/tests/test_sql_refsols/defog_broker_gen3_snowflake.sql new file mode 100644 index 000000000..9a3cc7626 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_gen3_snowflake.sql @@ -0,0 +1,18 @@ +WITH _s1 AS ( + SELECT + MIN(sbtxdatetime) AS min_sbtxdatetime, + sbtxcustid + FROM main.sbtransaction + GROUP BY + 2 +) +SELECT + sbcustomer.sbcustid AS cust_id, + DATEDIFF( + SECOND, + CAST(sbcustomer.sbcustjoindate AS DATETIME), + CAST(_s1.min_sbtxdatetime AS DATETIME) + ) / 86400.0 AS DaysFromJoinToFirstTransaction +FROM main.sbcustomer AS sbcustomer +JOIN _s1 AS _s1 + ON _s1.sbtxcustid = sbcustomer.sbcustid diff --git a/tests/test_sql_refsols/defog_broker_gen4_snowflake.sql b/tests/test_sql_refsols/defog_broker_gen4_snowflake.sql new file mode 100644 index 000000000..c534bb045 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_gen4_snowflake.sql @@ -0,0 +1,22 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + sbtxcustid + FROM main.sbtransaction + WHERE + CAST(sbtxdatetime AS TIMESTAMP) < CAST('2023-04-02' AS DATE) + AND CAST(sbtxdatetime AS TIMESTAMP) >= CAST('2023-04-01' AS DATE) + AND sbtxtype = 'sell' + GROUP BY + 2 +) +SELECT + sbcustomer.sbcustid AS _id, + sbcustomer.sbcustname AS name, + COALESCE(_s1.n_rows, 0) AS num_tx +FROM main.sbcustomer AS sbcustomer +LEFT JOIN _s1 AS _s1 + ON _s1.sbtxcustid = sbcustomer.sbcustid +ORDER BY + 3 DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_broker_gen5_snowflake.sql b/tests/test_sql_refsols/defog_broker_gen5_snowflake.sql new file mode 100644 index 000000000..e0c8a99a3 --- /dev/null +++ b/tests/test_sql_refsols/defog_broker_gen5_snowflake.sql @@ -0,0 +1,12 @@ +SELECT + DATE_TRUNC('MONTH', CAST(sbtxdatetime AS TIMESTAMP)) AS month, + AVG(sbtxprice) AS avg_price +FROM main.sbtransaction +WHERE + QUARTER(CAST(sbtxdatetime AS TIMESTAMP)) = 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) = 2023 + AND sbtxstatus = 'success' +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dealership_adv10_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv10_snowflake.sql new file mode 100644 index 000000000..376248e57 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv10_snowflake.sql @@ -0,0 +1,18 @@ +WITH _s1 AS ( + SELECT + MAX(payment_date) AS max_payment_date, + sale_id + FROM main.payments_received + GROUP BY + 2 +) +SELECT + ROUND( + AVG( + DATEDIFF(DAY, CAST(sales.sale_date AS DATETIME), CAST(_s1.max_payment_date AS DATETIME)) + ), + 2 + ) AS avg_days_to_payment +FROM main.sales AS sales +LEFT JOIN _s1 AS _s1 + ON _s1.sale_id = sales._id diff --git a/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql new file mode 100644 index 000000000..57a27617d --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv11_snowflake.sql @@ -0,0 +1,11 @@ +SELECT + ( + ( + COALESCE(SUM(sales.sale_price), 0) - COALESCE(SUM(cars.cost), 0) + ) / COALESCE(SUM(cars.cost), 0) + ) * 100 AS GPM +FROM main.sales AS sales +JOIN main.cars AS cars + ON cars._id = sales.car_id +WHERE + YEAR(CAST(sales.sale_date AS TIMESTAMP)) = 2023 diff --git a/tests/test_sql_refsols/defog_dealership_adv12_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv12_snowflake.sql new file mode 100644 index 000000000..441231c8e --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv12_snowflake.sql @@ -0,0 +1,28 @@ +WITH _t1 AS ( + SELECT + ANY_VALUE(sales.car_id) AS anything_car_id, + ANY_VALUE(cars.make) AS anything_make, + ANY_VALUE(cars.model) AS anything_model, + ANY_VALUE(sales.sale_price) AS anything_sale_price, + cars._id AS _id_1 + FROM main.sales AS sales + JOIN main.cars AS cars + ON cars._id = sales.car_id + JOIN main.inventory_snapshots AS inventory_snapshots + ON cars._id = inventory_snapshots.car_id + AND inventory_snapshots.is_in_inventory = 0 + AND inventory_snapshots.snapshot_date = sales.sale_date + GROUP BY + sales._id, + 5 +) +SELECT + anything_make AS make, + anything_model AS model, + anything_sale_price AS sale_price +FROM _t1 +WHERE + _id_1 = anything_car_id +ORDER BY + 3 DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv14_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv14_snowflake.sql new file mode 100644 index 000000000..aa0f7479e --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv14_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + COUNT(*) AS TSC +FROM main.sales +WHERE + DATEDIFF(DAY, CAST(sale_date AS DATETIME), CURRENT_TIMESTAMP()) <= 7 diff --git a/tests/test_sql_refsols/defog_dealership_adv15_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv15_snowflake.sql new file mode 100644 index 000000000..33e8ec651 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv15_snowflake.sql @@ -0,0 +1,18 @@ +WITH _s1 AS ( + SELECT + AVG(sale_price) AS avg_sale_price, + salesperson_id + FROM main.sales + GROUP BY + 2 +) +SELECT + salespersons.first_name, + salespersons.last_name, + _s1.avg_sale_price AS ASP +FROM main.salespersons AS salespersons +LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 3 DESC NULLS LAST +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_adv16_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv16_snowflake.sql new file mode 100644 index 000000000..f51f3467f --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv16_snowflake.sql @@ -0,0 +1,19 @@ +WITH _s1 AS ( + SELECT + SUM(sale_price) AS sum_sale_price, + salesperson_id + FROM main.sales + GROUP BY + 2 +) +SELECT + salespersons._id, + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.sum_sale_price, 0) AS total +FROM main.salespersons AS salespersons +LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 4 DESC NULLS LAST +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_adv1_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv1_snowflake.sql new file mode 100644 index 000000000..28c10f073 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv1_snowflake.sql @@ -0,0 +1,70 @@ +SELECT + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(payments_received.payment_date AS TIMESTAMP)) + 6 + ) % 7 + ) * -1, + CAST(payments_received.payment_date AS TIMESTAMP) + ) + ) AS payment_week, + COUNT(*) AS total_payments, + COALESCE( + COUNT_IF(( + ( + DAYOFWEEK(payments_received.payment_date) + 6 + ) % 7 + ) IN (5, 6)), + 0 + ) AS weekend_payments +FROM main.payments_received AS payments_received +JOIN main.sales AS sales + ON payments_received.sale_id = sales._id AND sales.sale_price > 30000 +WHERE + DATEDIFF( + WEEK, + CAST(DATEADD( + DAY, + ( + ( + DAYOFWEEK(payments_received.payment_date) + 6 + ) % 7 + ) * -1, + payments_received.payment_date + ) AS DATETIME), + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CURRENT_TIMESTAMP()) + 6 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP() + ) + ) <= 8 + AND DATEDIFF( + WEEK, + CAST(DATEADD( + DAY, + ( + ( + DAYOFWEEK(payments_received.payment_date) + 6 + ) % 7 + ) * -1, + payments_received.payment_date + ) AS DATETIME), + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CURRENT_TIMESTAMP()) + 6 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP() + ) + ) >= 1 +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_dealership_adv2_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv2_snowflake.sql new file mode 100644 index 000000000..951d47afe --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv2_snowflake.sql @@ -0,0 +1,21 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + salesperson_id + FROM main.sales + WHERE + DATEDIFF(DAY, CAST(sale_date AS DATETIME), CURRENT_TIMESTAMP()) <= 30 + GROUP BY + 2 +) +SELECT + salespersons._id, + salespersons.first_name, + salespersons.last_name, + _s1.n_rows AS num_sales +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 4 DESC NULLS LAST, + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql new file mode 100644 index 000000000..05258a65e --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv3_snowflake.sql @@ -0,0 +1,17 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + car_id + FROM main.sales + GROUP BY + 2 +) +SELECT + cars.make, + cars.model, + COALESCE(_s1.n_rows, 0) AS num_sales +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + CONTAINS(LOWER(cars.vin_number), 'm5') diff --git a/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql new file mode 100644 index 000000000..2d82c7314 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv4_snowflake.sql @@ -0,0 +1,25 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(sale_price) AS sum_sale_price, + car_id + FROM main.sales + WHERE + sale_date >= DATEADD(DAY, -30, CURRENT_TIMESTAMP()) + GROUP BY + 3 +) +SELECT + COALESCE(_s1.n_rows, 0) AS num_sales, + CASE + WHEN ( + NOT _s1.n_rows IS NULL AND _s1.n_rows > 0 + ) + THEN COALESCE(_s1.sum_sale_price, 0) + ELSE NULL + END AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + CONTAINS(LOWER(cars.make), 'toyota') diff --git a/tests/test_sql_refsols/defog_dealership_adv5_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv5_snowflake.sql new file mode 100644 index 000000000..c6080293e --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv5_snowflake.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(sale_price) AS sum_sale_price, + salesperson_id + FROM main.sales + GROUP BY + 3 +) +SELECT + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.sum_sale_price, 0) AS total_sales, + _s1.n_rows AS num_sales, + RANK() OVER (ORDER BY COALESCE(_s1.sum_sale_price, 0) DESC) AS sales_rank +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 3 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql new file mode 100644 index 000000000..e9166f5be --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv6_snowflake.sql @@ -0,0 +1,26 @@ +WITH _t1 AS ( + SELECT + car_id + FROM main.inventory_snapshots + QUALIFY + NOT is_in_inventory + AND ROW_NUMBER() OVER (PARTITION BY car_id ORDER BY snapshot_date DESC) = 1 +), _s3 AS ( + SELECT + MAX(sale_price) AS max_sale_price, + car_id + FROM main.sales + GROUP BY + 2 +) +SELECT + cars.make, + cars.model, + _s3.max_sale_price AS highest_sale_price +FROM main.cars AS cars +JOIN _t1 AS _t1 + ON _t1.car_id = cars._id +LEFT JOIN _s3 AS _s3 + ON _s3.car_id = cars._id +ORDER BY + 3 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_dealership_adv7_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv7_snowflake.sql new file mode 100644 index 000000000..265fc93bd --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv7_snowflake.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + AVG(sale_price) AS avg_sale_price, + car_id + FROM main.sales + GROUP BY + 2 +) +SELECT + cars.make, + cars.model, + cars.year, + cars.color, + cars.vin_number, + _s1.avg_sale_price +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +WHERE + CONTAINS(LOWER(cars.make), 'fords') OR CONTAINS(LOWER(cars.model), 'mustang') diff --git a/tests/test_sql_refsols/defog_dealership_adv9_snowflake.sql b/tests/test_sql_refsols/defog_dealership_adv9_snowflake.sql new file mode 100644 index 000000000..a581d84e5 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_adv9_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + AVG(sale_price) AS ASP +FROM main.sales +WHERE + sale_date <= '2023-03-31' AND sale_date >= '2023-01-01' diff --git a/tests/test_sql_refsols/defog_dealership_basic10_snowflake.sql b/tests/test_sql_refsols/defog_dealership_basic10_snowflake.sql new file mode 100644 index 000000000..ad6e9ed12 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic10_snowflake.sql @@ -0,0 +1,22 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(sale_price) AS sum_sale_price, + salesperson_id + FROM main.sales + WHERE + sale_date >= DATEADD(MONTH, -3, CURRENT_TIMESTAMP()) + GROUP BY + 3 +) +SELECT + salespersons.first_name, + salespersons.last_name, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +LEFT JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 4 DESC NULLS LAST +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic1_snowflake.sql b/tests/test_sql_refsols/defog_dealership_basic1_snowflake.sql new file mode 100644 index 000000000..bbd756397 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic1_snowflake.sql @@ -0,0 +1,17 @@ +WITH _u_0 AS ( + SELECT + car_id AS _u_1 + FROM main.sales + GROUP BY + 1 +) +SELECT + cars._id, + cars.make, + cars.model, + cars.year +FROM main.cars AS cars +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = cars._id +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dealership_basic2_snowflake.sql b/tests/test_sql_refsols/defog_dealership_basic2_snowflake.sql new file mode 100644 index 000000000..16cb73183 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic2_snowflake.sql @@ -0,0 +1,14 @@ +WITH _u_0 AS ( + SELECT + customer_id AS _u_1 + FROM main.sales + GROUP BY + 1 +) +SELECT + customers._id +FROM main.customers AS customers +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = customers._id +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dealership_basic3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_basic3_snowflake.sql new file mode 100644 index 000000000..4310aba26 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic3_snowflake.sql @@ -0,0 +1,17 @@ +WITH _u_0 AS ( + SELECT + sales.salesperson_id AS _u_1 + FROM main.sales AS sales + JOIN main.payments_received AS payments_received + ON payments_received.payment_method = 'cash' + AND payments_received.sale_id = sales._id + GROUP BY + 1 +) +SELECT + salespersons._id AS salesperson_id +FROM main.salespersons AS salespersons +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = salespersons._id +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dealership_basic4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_basic4_snowflake.sql new file mode 100644 index 000000000..8d94ed4be --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic4_snowflake.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + salesperson_id AS _u_1 + FROM main.sales + GROUP BY + 1 +) +SELECT + salespersons._id, + salespersons.first_name, + salespersons.last_name +FROM main.salespersons AS salespersons +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = salespersons._id +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_dealership_basic5_snowflake.sql b/tests/test_sql_refsols/defog_dealership_basic5_snowflake.sql new file mode 100644 index 000000000..7e60facb1 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic5_snowflake.sql @@ -0,0 +1,22 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows_1, + SUM(sale_price) AS sum_sale_price, + salesperson_id + FROM main.sales + WHERE + DATEDIFF(DAY, CAST(sale_date AS DATETIME), CURRENT_TIMESTAMP()) <= 30 + GROUP BY + 3 +) +SELECT + salespersons.first_name, + salespersons.last_name, + _s1.n_rows_1 AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.salespersons AS salespersons +JOIN _s1 AS _s1 + ON _s1.salesperson_id = salespersons._id +ORDER BY + 3 DESC NULLS LAST +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic6_snowflake.sql b/tests/test_sql_refsols/defog_dealership_basic6_snowflake.sql new file mode 100644 index 000000000..e34a0bded --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic6_snowflake.sql @@ -0,0 +1,12 @@ +SELECT + customers.state, + COUNT(DISTINCT sales.customer_id) AS unique_customers, + COALESCE(SUM(sales.sale_price), 0) AS total_revenue +FROM main.sales AS sales +JOIN main.customers AS customers + ON customers._id = sales.customer_id +GROUP BY + 1 +ORDER BY + 3 DESC NULLS LAST +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic7_snowflake.sql b/tests/test_sql_refsols/defog_dealership_basic7_snowflake.sql new file mode 100644 index 000000000..e906254e2 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic7_snowflake.sql @@ -0,0 +1,10 @@ +SELECT + payment_method, + COUNT(*) AS total_payments, + COALESCE(SUM(payment_amount), 0) AS total_amount +FROM main.payments_received +GROUP BY + 1 +ORDER BY + 3 DESC NULLS LAST +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_dealership_basic8_snowflake.sql b/tests/test_sql_refsols/defog_dealership_basic8_snowflake.sql new file mode 100644 index 000000000..df98deb1d --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic8_snowflake.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(sale_price) AS sum_sale_price, + car_id + FROM main.sales + GROUP BY + 3 +) +SELECT + cars.make, + cars.model, + COALESCE(_s1.n_rows, 0) AS total_sales, + COALESCE(_s1.sum_sale_price, 0) AS total_revenue +FROM main.cars AS cars +LEFT JOIN _s1 AS _s1 + ON _s1.car_id = cars._id +ORDER BY + 4 DESC NULLS LAST +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_dealership_basic9_snowflake.sql b/tests/test_sql_refsols/defog_dealership_basic9_snowflake.sql new file mode 100644 index 000000000..8b2f15206 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_basic9_snowflake.sql @@ -0,0 +1,9 @@ +SELECT + state, + COUNT(*) AS total_signups +FROM main.customers +GROUP BY + 1 +ORDER BY + 2 DESC NULLS LAST +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_dealership_gen1_snowflake.sql b/tests/test_sql_refsols/defog_dealership_gen1_snowflake.sql new file mode 100644 index 000000000..0b56ff82e --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_gen1_snowflake.sql @@ -0,0 +1,11 @@ +SELECT + first_name, + last_name, + phone, + DATEDIFF(DAY, CAST(hire_date AS DATETIME), CAST(termination_date AS DATETIME)) * 1.0 AS days_employed +FROM main.salespersons +WHERE + NOT termination_date IS NULL +ORDER BY + 4 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_dealership_gen2_snowflake.sql b/tests/test_sql_refsols/defog_dealership_gen2_snowflake.sql new file mode 100644 index 000000000..d7f5d6345 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_gen2_snowflake.sql @@ -0,0 +1,10 @@ +SELECT + COUNT(*) AS weekend_payments +FROM main.payments_made +WHERE + ( + ( + DAYOFWEEK(payment_date) + 6 + ) % 7 + ) IN (5, 6) + AND vendor_name = 'Utility Company' diff --git a/tests/test_sql_refsols/defog_dealership_gen3_snowflake.sql b/tests/test_sql_refsols/defog_dealership_gen3_snowflake.sql new file mode 100644 index 000000000..84fb51a65 --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_gen3_snowflake.sql @@ -0,0 +1,29 @@ +SELECT + payment_date, + payment_method, + COALESCE(SUM(payment_amount), 0) AS total_amount +FROM main.payments_received +WHERE + DATEDIFF( + WEEK, + CAST(DATEADD(DAY, ( + ( + DAYOFWEEK(payment_date) + 6 + ) % 7 + ) * -1, payment_date) AS DATETIME), + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CURRENT_TIMESTAMP()) + 6 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP() + ) + ) = 1 +GROUP BY + 1, + 2 +ORDER BY + 1 DESC NULLS LAST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql b/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql new file mode 100644 index 000000000..6825403cf --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_gen4_snowflake.sql @@ -0,0 +1,33 @@ +WITH _s0 AS ( + SELECT + DATE_TRUNC('QUARTER', CAST(sale_date AS TIMESTAMP)) AS quarter, + SUM(sale_price) AS sum_sale_price, + customer_id + FROM main.sales + WHERE + YEAR(CAST(sale_date AS TIMESTAMP)) = 2023 + GROUP BY + 1, + 3 +), _t1 AS ( + SELECT + SUM(_s0.sum_sale_price) AS sum_sum_sale_price, + _s0.quarter, + customers.state + FROM _s0 AS _s0 + JOIN main.customers AS customers + ON _s0.customer_id = customers._id + GROUP BY + 2, + 3 +) +SELECT + quarter, + state AS customer_state, + COALESCE(sum_sum_sale_price, 0) AS total_sales +FROM _t1 +WHERE + NOT sum_sum_sale_price IS NULL AND sum_sum_sale_price > 0 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/defog_dealership_gen5_snowflake.sql b/tests/test_sql_refsols/defog_dealership_gen5_snowflake.sql new file mode 100644 index 000000000..830b28cec --- /dev/null +++ b/tests/test_sql_refsols/defog_dealership_gen5_snowflake.sql @@ -0,0 +1,18 @@ +WITH _t0 AS ( + SELECT + car_id + FROM main.inventory_snapshots + WHERE + MONTH(CAST(snapshot_date AS TIMESTAMP)) = 3 + AND YEAR(CAST(snapshot_date AS TIMESTAMP)) = 2023 + QUALIFY + RANK() OVER (ORDER BY snapshot_date DESC) = 1 AND is_in_inventory +) +SELECT + cars._id, + cars.make, + cars.model, + cars.year +FROM _t0 AS _t0 +JOIN main.cars AS cars + ON _t0.car_id = cars._id diff --git a/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql new file mode 100644 index 000000000..273fe3fb6 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv10_snowflake.sql @@ -0,0 +1,16 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + sender_id + FROM main.wallet_transactions_daily + WHERE + sender_type = 0 + GROUP BY + 2 +) +SELECT + users.uid AS user_id, + _s1.n_rows AS total_transactions +FROM main.users AS users +JOIN _s1 AS _s1 + ON _s1.sender_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql new file mode 100644 index 000000000..e8d3f43cd --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv11_snowflake.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + SUM( + DATEDIFF(SECOND, CAST(session_start_ts AS DATETIME), CAST(session_end_ts AS DATETIME)) + ) AS sum_duration, + user_id + FROM main.user_sessions + WHERE + session_end_ts < '2023-06-08' AND session_start_ts >= '2023-06-01' + GROUP BY + 2 +) +SELECT + users.uid, + COALESCE(_s1.sum_duration, 0) AS total_duration +FROM main.users AS users +JOIN _s1 AS _s1 + ON _s1.user_id = users.uid +ORDER BY + 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql new file mode 100644 index 000000000..a874f7588 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv12_snowflake.sql @@ -0,0 +1,16 @@ +WITH _s1 AS ( + SELECT + SUM(amount) AS sum_amount, + coupon_id + FROM main.wallet_transactions_daily + GROUP BY + 2 +) +SELECT + coupons.cid AS coupon_id, + COALESCE(_s1.sum_amount, 0) AS total_discount +FROM main.coupons AS coupons +LEFT JOIN _s1 AS _s1 + ON _s1.coupon_id = coupons.cid +WHERE + coupons.merchant_id = '1' diff --git a/tests/test_sql_refsols/defog_ewallet_adv13_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv13_snowflake.sql new file mode 100644 index 000000000..0ebf11a50 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv13_snowflake.sql @@ -0,0 +1,6 @@ +SELECT + COUNT(*) AS TUC +FROM main.user_sessions +WHERE + session_end_ts >= DATE_TRUNC('DAY', DATEADD(MONTH, -1, CURRENT_TIMESTAMP())) + OR session_start_ts >= DATE_TRUNC('DAY', DATEADD(MONTH, -1, CURRENT_TIMESTAMP())) diff --git a/tests/test_sql_refsols/defog_ewallet_adv14_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv14_snowflake.sql new file mode 100644 index 000000000..3cfa5fa6f --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv14_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + COALESCE(COUNT_IF(status = 'success'), 0) / COUNT(*) AS _expr0 +FROM main.wallet_transactions_daily +WHERE + DATEDIFF(MONTH, CAST(created_at AS DATETIME), CURRENT_TIMESTAMP()) = 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv15_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv15_snowflake.sql new file mode 100644 index 000000000..d60f519f9 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv15_snowflake.sql @@ -0,0 +1,21 @@ +WITH _s3 AS ( + SELECT + COUNT(*) AS n_rows, + coupons.merchant_id + FROM main.coupons AS coupons + JOIN main.merchants AS merchants + ON DATEDIFF(MONTH, CAST(merchants.created_at AS DATETIME), CAST(coupons.created_at AS DATETIME)) = 0 + AND coupons.merchant_id = merchants.mid + GROUP BY + 2 +) +SELECT + merchants.mid AS merchant_id, + merchants.name AS merchant_name, + COALESCE(_s3.n_rows, 0) AS coupons_per_merchant +FROM main.merchants AS merchants +LEFT JOIN _s3 AS _s3 + ON _s3.merchant_id = merchants.mid +ORDER BY + 3 DESC NULLS LAST +LIMIT 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv16_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv16_snowflake.sql new file mode 100644 index 000000000..635f567ce --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv16_snowflake.sql @@ -0,0 +1,18 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + user_id + FROM main.notifications + WHERE + status = 'unread' AND type = 'promotion' + GROUP BY + 2 +) +SELECT + users.username, + _s1.n_rows AS total_unread_notifs +FROM main.users AS users +JOIN _s1 AS _s1 + ON _s1.user_id = users.uid +WHERE + LOWER(users.country) = 'us' diff --git a/tests/test_sql_refsols/defog_ewallet_adv1_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv1_snowflake.sql new file mode 100644 index 000000000..4466e1c8c --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv1_snowflake.sql @@ -0,0 +1,19 @@ +WITH _s1 AS ( + SELECT + COUNT(DISTINCT coupon_id) AS ndistinct_coupon_id, + COUNT(DISTINCT txid) AS ndistinct_txid, + receiver_id + FROM main.wallet_transactions_daily + WHERE + status = 'success' + GROUP BY + 3 +) +SELECT + merchants.name, + ( + _s1.ndistinct_coupon_id * 1.0 + ) / _s1.ndistinct_txid AS CPUR +FROM main.merchants AS merchants +JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid diff --git a/tests/test_sql_refsols/defog_ewallet_adv2_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv2_snowflake.sql new file mode 100644 index 000000000..bd7a73fe3 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv2_snowflake.sql @@ -0,0 +1,53 @@ +SELECT + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(notifications.created_at AS TIMESTAMP)) + 6 + ) % 7 + ) * -1, + CAST(notifications.created_at AS TIMESTAMP) + ) + ) AS week, + COUNT(*) AS num_notifs, + COALESCE(COUNT_IF(( + ( + DAYOFWEEK(notifications.created_at) + 6 + ) % 7 + ) IN (5, 6)), 0) AS weekend_notifs +FROM main.notifications AS notifications +JOIN main.users AS users + ON notifications.user_id = users.uid AND users.country IN ('US', 'CA') +WHERE + notifications.created_at < DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CURRENT_TIMESTAMP()) + 6 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP() + ) + ) + AND notifications.created_at >= DATEADD( + WEEK, + -3, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CURRENT_TIMESTAMP()) + 6 + ) % 7 + ) * -1, + CURRENT_TIMESTAMP() + ) + ) + ) +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_adv3_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv3_snowflake.sql new file mode 100644 index 000000000..ca5865599 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv3_snowflake.sql @@ -0,0 +1,16 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + merchant_id + FROM main.coupons + GROUP BY + 2 +) +SELECT + merchants.name AS merchant_name, + _s1.n_rows AS total_coupons +FROM main.merchants AS merchants +JOIN _s1 AS _s1 + ON _s1.merchant_id = merchants.mid +WHERE + CONTAINS(LOWER(merchants.category), 'retail') AND merchants.status = 'active' diff --git a/tests/test_sql_refsols/defog_ewallet_adv4_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv4_snowflake.sql new file mode 100644 index 000000000..b6ab12c52 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv4_snowflake.sql @@ -0,0 +1,12 @@ +SELECT + COUNT(*) AS num_transactions, + CASE + WHEN COUNT(*) > 0 + THEN COALESCE(SUM(wallet_transactions_daily.amount), 0) + ELSE NULL + END AS total_amount +FROM main.wallet_transactions_daily AS wallet_transactions_daily +JOIN main.users AS users + ON users.country = 'US' AND users.uid = wallet_transactions_daily.sender_id +WHERE + DATEDIFF(DAY, CAST(wallet_transactions_daily.created_at AS DATETIME), CURRENT_TIMESTAMP()) <= 7 diff --git a/tests/test_sql_refsols/defog_ewallet_adv5_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv5_snowflake.sql new file mode 100644 index 000000000..679a19eab --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv5_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + AVG(balance) AS AMB +FROM main.wallet_user_balance_daily +WHERE + DATEDIFF(DAY, CAST(updated_at AS DATETIME), CURRENT_TIMESTAMP()) <= 7 diff --git a/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql new file mode 100644 index 000000000..facbf73f3 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv6_snowflake.sql @@ -0,0 +1,14 @@ +WITH _t0 AS ( + SELECT + balance, + user_id + FROM main.wallet_user_balance_daily + QUALIFY + ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY updated_at DESC) = 1 +) +SELECT + users.uid AS user_id, + _t0.balance AS latest_balance +FROM main.users AS users +JOIN _t0 AS _t0 + ON _t0.user_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv7_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv7_snowflake.sql new file mode 100644 index 000000000..3a44a5777 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv7_snowflake.sql @@ -0,0 +1,14 @@ +WITH _t0 AS ( + SELECT + marketing_opt_in, + user_id + FROM main.user_setting_snapshot + QUALIFY + ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) = 1 +) +SELECT + users.uid, + _t0.marketing_opt_in +FROM main.users AS users +JOIN _t0 AS _t0 + ON _t0.user_id = users.uid diff --git a/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql new file mode 100644 index 000000000..d8a15f120 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv8_snowflake.sql @@ -0,0 +1,19 @@ +WITH _s1 AS ( + SELECT + SUM(amount) AS sum_amount, + receiver_id + FROM main.wallet_transactions_daily + WHERE + receiver_type = 1 AND status = 'success' + GROUP BY + 2 +) +SELECT + merchants.mid AS merchants_id, + merchants.name AS merchants_name, + merchants.category, + COALESCE(_s1.sum_amount, 0) AS total_revenue, + ROW_NUMBER() OVER (ORDER BY COALESCE(_s1.sum_amount, 0) DESC) AS mrr +FROM main.merchants AS merchants +JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid diff --git a/tests/test_sql_refsols/defog_ewallet_adv9_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_adv9_snowflake.sql new file mode 100644 index 000000000..fc9507f0f --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_adv9_snowflake.sql @@ -0,0 +1,10 @@ +SELECT + DATE_TRUNC('MONTH', CAST(created_at AS TIMESTAMP)) AS year_month, + COUNT(DISTINCT sender_id) AS active_users +FROM main.wallet_transactions_daily +WHERE + created_at < DATE_TRUNC('MONTH', CURRENT_TIMESTAMP()) + AND created_at >= DATEADD(MONTH, -2, DATE_TRUNC('MONTH', CURRENT_TIMESTAMP())) + AND sender_type = 0 +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql new file mode 100644 index 000000000..75fa0c7cb --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic10_snowflake.sql @@ -0,0 +1,22 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(amount) AS sum_amount, + receiver_id + FROM main.wallet_transactions_daily + WHERE + created_at >= DATE_TRUNC('DAY', DATEADD(DAY, -150, CURRENT_TIMESTAMP())) + AND receiver_type = 1 + GROUP BY + 3 +) +SELECT + merchants.name AS merchant_name, + COALESCE(_s1.n_rows, 0) AS total_transactions, + COALESCE(_s1.sum_amount, 0) AS total_amount +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.receiver_id = merchants.mid +ORDER BY + 3 DESC NULLS LAST +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic1_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic1_snowflake.sql new file mode 100644 index 000000000..5ea6919b3 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic1_snowflake.sql @@ -0,0 +1,12 @@ +SELECT + DATE_TRUNC('MONTH', CAST(wallet_transactions_daily.created_at AS TIMESTAMP)) AS month, + COUNT(DISTINCT wallet_transactions_daily.sender_id) AS active_users +FROM main.wallet_transactions_daily AS wallet_transactions_daily +JOIN main.users AS users + ON users.status = 'active' AND users.uid = wallet_transactions_daily.sender_id +WHERE + YEAR(CAST(wallet_transactions_daily.created_at AS TIMESTAMP)) = 2023 + AND wallet_transactions_daily.sender_type = 0 + AND wallet_transactions_daily.status = 'success' +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_basic2_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic2_snowflake.sql new file mode 100644 index 000000000..9ca419d96 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic2_snowflake.sql @@ -0,0 +1,15 @@ +WITH _u_0 AS ( + SELECT + merchant_id AS _u_1 + FROM main.coupons + GROUP BY + 1 +) +SELECT + merchants.mid AS merchant_id, + merchants.name AS merchant_name +FROM main.merchants AS merchants +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = merchants.mid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_ewallet_basic3_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic3_snowflake.sql new file mode 100644 index 000000000..80b4b414d --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic3_snowflake.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + receiver_id AS _u_1 + FROM main.wallet_transactions_daily + WHERE + receiver_type = 1 + GROUP BY + 1 +) +SELECT + merchants.mid AS merchant +FROM main.merchants AS merchants +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = merchants.mid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_ewallet_basic4_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic4_snowflake.sql new file mode 100644 index 000000000..b831e9388 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic4_snowflake.sql @@ -0,0 +1,16 @@ +WITH _u_0 AS ( + SELECT + user_id AS _u_1 + FROM main.notifications + WHERE + type = 'transaction' + GROUP BY + 1 +) +SELECT + users.uid AS user_id +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.uid +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_ewallet_basic5_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic5_snowflake.sql new file mode 100644 index 000000000..b1a33dd01 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic5_snowflake.sql @@ -0,0 +1,15 @@ +WITH _u_0 AS ( + SELECT + user_id AS _u_1 + FROM main.notifications + GROUP BY + 1 +) +SELECT + users.uid, + users.username +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.uid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/defog_ewallet_basic6_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic6_snowflake.sql new file mode 100644 index 000000000..bd8925e79 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic6_snowflake.sql @@ -0,0 +1,9 @@ +SELECT + device_type, + COUNT(*) AS count +FROM main.user_sessions +GROUP BY + 1 +ORDER BY + 2 DESC NULLS LAST +LIMIT 2 diff --git a/tests/test_sql_refsols/defog_ewallet_basic7_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic7_snowflake.sql new file mode 100644 index 000000000..2575506fa --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic7_snowflake.sql @@ -0,0 +1,9 @@ +SELECT + status, + COUNT(*) AS count +FROM main.wallet_transactions_daily +GROUP BY + 1 +ORDER BY + 2 DESC NULLS LAST +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql new file mode 100644 index 000000000..035c4d087 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic8_snowflake.sql @@ -0,0 +1,19 @@ +WITH _s1 AS ( + SELECT + COUNT(txid) AS count_txid, + SUM(amount) AS sum_amount, + coupon_id + FROM main.wallet_transactions_daily + GROUP BY + 3 +) +SELECT + coupons.code AS coupon_code, + COALESCE(_s1.count_txid, 0) AS redemption_count, + COALESCE(_s1.sum_amount, 0) AS total_discount +FROM main.coupons AS coupons +LEFT JOIN _s1 AS _s1 + ON _s1.coupon_id = coupons.cid +ORDER BY + 2 DESC NULLS LAST +LIMIT 3 diff --git a/tests/test_sql_refsols/defog_ewallet_basic9_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_basic9_snowflake.sql new file mode 100644 index 000000000..114ba10b7 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_basic9_snowflake.sql @@ -0,0 +1,20 @@ +WITH _s1 AS ( + SELECT + country, + uid + FROM main.users +) +SELECT + _s1.country, + COUNT(DISTINCT wallet_transactions_daily.sender_id) AS user_count, + COALESCE(SUM(wallet_transactions_daily.amount), 0) AS total_amount +FROM main.wallet_transactions_daily AS wallet_transactions_daily +LEFT JOIN _s1 AS _s1 + ON _s1.uid = wallet_transactions_daily.sender_id +WHERE + wallet_transactions_daily.sender_type = 0 +GROUP BY + 1 +ORDER BY + 3 DESC NULLS LAST +LIMIT 5 diff --git a/tests/test_sql_refsols/defog_ewallet_gen1_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_gen1_snowflake.sql new file mode 100644 index 000000000..fb87bcce3 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_gen1_snowflake.sql @@ -0,0 +1,9 @@ +SELECT + MEDIAN(wallet_merchant_balance_daily.balance) AS _expr0 +FROM main.wallet_merchant_balance_daily AS wallet_merchant_balance_daily +JOIN main.merchants AS merchants + ON CONTAINS(LOWER(merchants.category), 'retail') + AND merchants.mid = wallet_merchant_balance_daily.merchant_id + AND merchants.status = 'active' +WHERE + DATE_TRUNC('DAY', CAST(wallet_merchant_balance_daily.updated_at AS TIMESTAMP)) = DATE_TRUNC('DAY', CURRENT_TIMESTAMP()) diff --git a/tests/test_sql_refsols/defog_ewallet_gen2_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_gen2_snowflake.sql new file mode 100644 index 000000000..958b2afa2 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_gen2_snowflake.sql @@ -0,0 +1,23 @@ +WITH _s0 AS ( + SELECT + MIN(snapshot_date) AS min_date + FROM main.user_setting_snapshot + WHERE + YEAR(CAST(snapshot_date AS TIMESTAMP)) = 2023 +), _s1 AS ( + SELECT + AVG(tx_limit_daily) AS avg_tx_limit_daily, + AVG(tx_limit_monthly) AS avg_tx_limit_monthly, + snapshot_date + FROM main.user_setting_snapshot + WHERE + YEAR(CAST(snapshot_date AS TIMESTAMP)) = 2023 + GROUP BY + 3 +) +SELECT + _s1.avg_tx_limit_daily AS avg_daily_limit, + _s1.avg_tx_limit_monthly AS avg_monthly_limit +FROM _s0 AS _s0 +LEFT JOIN _s1 AS _s1 + ON _s0.min_date = _s1.snapshot_date diff --git a/tests/test_sql_refsols/defog_ewallet_gen3_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_gen3_snowflake.sql new file mode 100644 index 000000000..7432d05ac --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_gen3_snowflake.sql @@ -0,0 +1,8 @@ +SELECT + device_type, + AVG( + DATEDIFF(SECOND, CAST(session_start_ts AS DATETIME), CAST(session_end_ts AS DATETIME)) + ) AS avg_session_duration_seconds +FROM main.user_sessions +GROUP BY + 1 diff --git a/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql new file mode 100644 index 000000000..ad361d520 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_gen4_snowflake.sql @@ -0,0 +1,35 @@ +WITH _t0 AS ( + SELECT + merchant_id, + start_date + FROM main.coupons +), _s1 AS ( + SELECT + MIN(start_date) AS min_start_date, + merchant_id + FROM _t0 + GROUP BY + 2 +), _s3 AS ( + SELECT + MAX(cid) AS max_cid, + merchant_id, + start_date + FROM main.coupons + GROUP BY + 2, + 3 +) +SELECT + merchants.mid AS merchants_id, + merchants.created_at AS merchant_registration_date, + _s1.min_start_date AS earliest_coupon_start_date, + _s3.max_cid AS earliest_coupon_id +FROM main.merchants AS merchants +LEFT JOIN _s1 AS _s1 + ON _s1.merchant_id = merchants.mid +LEFT JOIN _s3 AS _s3 + ON _s1.min_start_date = _s3.start_date AND _s3.merchant_id = merchants.mid +JOIN _t0 AS _s5 + ON _s5.merchant_id = merchants.mid + AND _s5.start_date <= DATEADD(YEAR, 1, CAST(merchants.created_at AS TIMESTAMP)) diff --git a/tests/test_sql_refsols/defog_ewallet_gen5_snowflake.sql b/tests/test_sql_refsols/defog_ewallet_gen5_snowflake.sql new file mode 100644 index 000000000..dbace8406 --- /dev/null +++ b/tests/test_sql_refsols/defog_ewallet_gen5_snowflake.sql @@ -0,0 +1,20 @@ +WITH _u_0 AS ( + SELECT + notifications.user_id AS _u_1 + FROM main.notifications AS notifications + JOIN main.users AS users + ON notifications.created_at <= DATEADD(YEAR, 1, CAST(users.created_at AS TIMESTAMP)) + AND notifications.created_at >= users.created_at + AND notifications.user_id = users.uid + GROUP BY + 1 +) +SELECT + users.username, + users.email, + users.created_at +FROM main.users AS users +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = users.uid +WHERE + _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/epoch_culture_events_info_snowflake.sql b/tests/test_sql_refsols/epoch_culture_events_info_snowflake.sql new file mode 100644 index 000000000..7db29730d --- /dev/null +++ b/tests/test_sql_refsols/epoch_culture_events_info_snowflake.sql @@ -0,0 +1,32 @@ +WITH _s2 AS ( + SELECT + ev_dt, + ev_key + FROM events +) +SELECT + events.ev_name AS event_name, + eras.er_name AS era_name, + YEAR(CAST(events.ev_dt AS TIMESTAMP)) AS event_year, + seasons.s_name AS season_name, + times.t_name AS tod +FROM events AS events +JOIN eras AS eras + ON eras.er_end_year > YEAR(CAST(events.ev_dt AS TIMESTAMP)) + AND eras.er_start_year <= YEAR(CAST(events.ev_dt AS TIMESTAMP)) +JOIN _s2 AS _s2 + ON _s2.ev_key = events.ev_key +JOIN seasons AS seasons + ON seasons.s_month1 = MONTH(CAST(_s2.ev_dt AS TIMESTAMP)) + OR seasons.s_month2 = MONTH(CAST(_s2.ev_dt AS TIMESTAMP)) + OR seasons.s_month3 = MONTH(CAST(_s2.ev_dt AS TIMESTAMP)) +JOIN _s2 AS _s6 + ON _s6.ev_key = events.ev_key +JOIN times AS times + ON times.t_end_hour > HOUR(CAST(_s6.ev_dt AS TIMESTAMP)) + AND times.t_start_hour <= HOUR(CAST(_s6.ev_dt AS TIMESTAMP)) +WHERE + events.ev_typ = 'culture' +ORDER BY + events.ev_dt NULLS FIRST +LIMIT 6 diff --git a/tests/test_sql_refsols/epoch_event_gap_per_era_snowflake.sql b/tests/test_sql_refsols/epoch_event_gap_per_era_snowflake.sql new file mode 100644 index 000000000..e4de3b246 --- /dev/null +++ b/tests/test_sql_refsols/epoch_event_gap_per_era_snowflake.sql @@ -0,0 +1,27 @@ +WITH _t2 AS ( + SELECT + DATEDIFF( + DAY, + CAST(LAG(events.ev_dt, 1) OVER (PARTITION BY eras.er_name, eras.er_name ORDER BY events.ev_dt) AS DATETIME), + CAST(events.ev_dt AS DATETIME) + ) AS day_gap, + eras.er_end_year, + eras.er_name, + eras.er_start_year, + events.ev_dt + FROM eras AS eras + JOIN events AS events + ON eras.er_end_year > YEAR(CAST(events.ev_dt AS TIMESTAMP)) + AND eras.er_start_year <= YEAR(CAST(events.ev_dt AS TIMESTAMP)) +) +SELECT + er_name AS era_name, + AVG(day_gap) AS avg_event_gap +FROM _t2 +WHERE + er_end_year > YEAR(CAST(ev_dt AS TIMESTAMP)) + AND er_start_year <= YEAR(CAST(ev_dt AS TIMESTAMP)) +GROUP BY + 1 +ORDER BY + ANY_VALUE(er_start_year) NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_events_per_season_snowflake.sql b/tests/test_sql_refsols/epoch_events_per_season_snowflake.sql new file mode 100644 index 000000000..f3744ccc7 --- /dev/null +++ b/tests/test_sql_refsols/epoch_events_per_season_snowflake.sql @@ -0,0 +1,13 @@ +SELECT + seasons.s_name AS season_name, + COUNT(*) AS n_events +FROM seasons AS seasons +JOIN events AS events + ON seasons.s_month1 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) + OR seasons.s_month2 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) + OR seasons.s_month3 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) +GROUP BY + 1 +ORDER BY + 2 DESC NULLS LAST, + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_first_event_per_era_snowflake.sql b/tests/test_sql_refsols/epoch_first_event_per_era_snowflake.sql new file mode 100644 index 000000000..c313bfd21 --- /dev/null +++ b/tests/test_sql_refsols/epoch_first_event_per_era_snowflake.sql @@ -0,0 +1,18 @@ +WITH _t1 AS ( + SELECT + eras.er_name, + eras.er_start_year, + events.ev_name + FROM eras AS eras + JOIN events AS events + ON eras.er_end_year > YEAR(CAST(events.ev_dt AS TIMESTAMP)) + AND eras.er_start_year <= YEAR(CAST(events.ev_dt AS TIMESTAMP)) + QUALIFY + ROW_NUMBER() OVER (PARTITION BY eras.er_name ORDER BY events.ev_dt) = 1 +) +SELECT + er_name AS era_name, + ev_name AS event_name +FROM _t1 +ORDER BY + er_start_year NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql new file mode 100644 index 000000000..1ddb4623b --- /dev/null +++ b/tests/test_sql_refsols/epoch_intra_season_searches_snowflake.sql @@ -0,0 +1,82 @@ +WITH _s0 AS ( + SELECT + s_month1, + s_month2, + s_month3, + s_name + FROM seasons +), _s5 AS ( + SELECT + ev_dt, + ev_name + FROM events +), _s9 AS ( + SELECT + COUNT(*) AS n_rows, + _s2.s_name, + searches.search_id + FROM _s0 AS _s2 + JOIN searches AS searches + ON _s2.s_month1 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s2.s_month2 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s2.s_month3 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + JOIN _s5 AS _s5 + ON CONTAINS(LOWER(searches.search_string), LOWER(_s5.ev_name)) + JOIN _s0 AS _s7 + ON _s2.s_name = _s7.s_name + AND ( + _s7.s_month1 = MONTH(CAST(_s5.ev_dt AS TIMESTAMP)) + OR _s7.s_month2 = MONTH(CAST(_s5.ev_dt AS TIMESTAMP)) + OR _s7.s_month3 = MONTH(CAST(_s5.ev_dt AS TIMESTAMP)) + ) + GROUP BY + 2, + 3 +), _s16 AS ( + SELECT + COUNT(*) AS n_rows, + COUNT_IF(( + NOT _s9.n_rows IS NULL AND _s9.n_rows > 0 + )) AS sum_is_intra_season, + _s0.s_name + FROM _s0 AS _s0 + JOIN searches AS searches + ON _s0.s_month1 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s0.s_month2 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s0.s_month3 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + LEFT JOIN _s9 AS _s9 + ON _s0.s_name = _s9.s_name AND _s9.search_id = searches.search_id + GROUP BY + 3 +), _s17 AS ( + SELECT + COUNT(*) AS n_rows, + COUNT_IF(_s15.s_name = _s10.s_name) AS sum_is_intra_season, + _s10.s_name + FROM _s0 AS _s10 + JOIN _s5 AS _s11 + ON _s10.s_month1 = MONTH(CAST(_s11.ev_dt AS TIMESTAMP)) + OR _s10.s_month2 = MONTH(CAST(_s11.ev_dt AS TIMESTAMP)) + OR _s10.s_month3 = MONTH(CAST(_s11.ev_dt AS TIMESTAMP)) + JOIN searches AS searches + ON CONTAINS(LOWER(searches.search_string), LOWER(_s11.ev_name)) + JOIN _s0 AS _s15 + ON _s15.s_month1 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s15.s_month2 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + OR _s15.s_month3 = MONTH(CAST(searches.search_ts AS TIMESTAMP)) + GROUP BY + 3 +) +SELECT + _s16.s_name AS season_name, + ROUND(( + 100.0 * COALESCE(_s16.sum_is_intra_season, 0) + ) / _s16.n_rows, 2) AS pct_season_searches, + ROUND(( + 100.0 * COALESCE(_s17.sum_is_intra_season, 0) + ) / COALESCE(_s17.n_rows, 0), 2) AS pct_event_searches +FROM _s16 AS _s16 +LEFT JOIN _s17 AS _s17 + ON _s16.s_name = _s17.s_name +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_most_popular_search_engine_per_tod_snowflake.sql b/tests/test_sql_refsols/epoch_most_popular_search_engine_per_tod_snowflake.sql new file mode 100644 index 000000000..cabbb3e36 --- /dev/null +++ b/tests/test_sql_refsols/epoch_most_popular_search_engine_per_tod_snowflake.sql @@ -0,0 +1,22 @@ +WITH _t1 AS ( + SELECT + COUNT(*) AS n_searches, + searches.search_engine, + times.t_name + FROM times AS times + JOIN searches AS searches + ON times.t_end_hour > HOUR(CAST(searches.search_ts AS TIMESTAMP)) + AND times.t_start_hour <= HOUR(CAST(searches.search_ts AS TIMESTAMP)) + GROUP BY + 2, + 3 + QUALIFY + ROW_NUMBER() OVER (PARTITION BY times.t_name ORDER BY COUNT(*) DESC, searches.search_engine) = 1 +) +SELECT + t_name AS tod, + search_engine, + n_searches +FROM _t1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_most_popular_topic_per_region_snowflake.sql b/tests/test_sql_refsols/epoch_most_popular_topic_per_region_snowflake.sql new file mode 100644 index 000000000..1f7517326 --- /dev/null +++ b/tests/test_sql_refsols/epoch_most_popular_topic_per_region_snowflake.sql @@ -0,0 +1,21 @@ +WITH _t0 AS ( + SELECT + COUNT(DISTINCT searches.search_id) AS n_searches, + events.ev_typ, + users.user_region + FROM events AS events + JOIN searches AS searches + ON CONTAINS(LOWER(searches.search_string), LOWER(events.ev_name)) + JOIN users AS users + ON searches.search_user_id = users.user_id + GROUP BY + 2, + 3 + QUALIFY + ROW_NUMBER() OVER (PARTITION BY users.user_region ORDER BY COUNT(DISTINCT searches.search_id) DESC) = 1 +) +SELECT + user_region AS region, + ev_typ AS event_type, + n_searches +FROM _t0 diff --git a/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql b/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql new file mode 100644 index 000000000..5e8914950 --- /dev/null +++ b/tests/test_sql_refsols/epoch_num_predawn_cold_war_snowflake.sql @@ -0,0 +1,27 @@ +WITH _s0 AS ( + SELECT + ev_dt, + ev_key + FROM events +), _u_0 AS ( + SELECT + _s2.ev_key AS _u_1 + FROM _s0 AS _s2 + JOIN eras AS eras + ON eras.er_end_year > YEAR(CAST(_s2.ev_dt AS TIMESTAMP)) + AND eras.er_name = 'Cold War' + AND eras.er_start_year <= YEAR(CAST(_s2.ev_dt AS TIMESTAMP)) + GROUP BY + 1 +) +SELECT + COUNT(*) AS n_events +FROM _s0 AS _s0 +JOIN times AS times + ON times.t_end_hour > HOUR(CAST(_s0.ev_dt AS TIMESTAMP)) + AND times.t_name = 'Pre-Dawn' + AND times.t_start_hour <= HOUR(CAST(_s0.ev_dt AS TIMESTAMP)) +LEFT JOIN _u_0 AS _u_0 + ON _s0.ev_key = _u_0._u_1 +WHERE + NOT _u_0._u_1 IS NULL diff --git a/tests/test_sql_refsols/epoch_overlapping_event_search_other_users_per_user_snowflake.sql b/tests/test_sql_refsols/epoch_overlapping_event_search_other_users_per_user_snowflake.sql new file mode 100644 index 000000000..760a8ee7a --- /dev/null +++ b/tests/test_sql_refsols/epoch_overlapping_event_search_other_users_per_user_snowflake.sql @@ -0,0 +1,29 @@ +WITH _s0 AS ( + SELECT + user_id, + user_name + FROM users +), _s1 AS ( + SELECT + search_string, + search_user_id + FROM searches +) +SELECT + ANY_VALUE(_s0.user_name) AS user_name, + COUNT(DISTINCT _s7.user_id) AS n_other_users +FROM _s0 AS _s0 +JOIN _s1 AS _s1 + ON _s0.user_id = _s1.search_user_id +JOIN events AS events + ON CONTAINS(LOWER(_s1.search_string), LOWER(events.ev_name)) +JOIN _s1 AS _s5 + ON CONTAINS(LOWER(_s5.search_string), LOWER(events.ev_name)) +JOIN _s0 AS _s7 + ON _s0.user_name <> _s7.user_name AND _s5.search_user_id = _s7.user_id +GROUP BY + _s0.user_id +ORDER BY + 2 DESC NULLS LAST, + 1 NULLS FIRST +LIMIT 7 diff --git a/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_snowflake.sql b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_snowflake.sql new file mode 100644 index 000000000..e6b9376d7 --- /dev/null +++ b/tests/test_sql_refsols/epoch_overlapping_event_searches_per_user_snowflake.sql @@ -0,0 +1,35 @@ +WITH _s0 AS ( + SELECT + user_id, + user_name + FROM users +), _t2 AS ( + SELECT + ANY_VALUE(searches.search_user_id) AS anything_search_user_id, + ANY_VALUE(_s0.user_name) AS anything_user_name, + _s0.user_id + FROM _s0 AS _s0 + JOIN searches AS searches + ON _s0.user_id = searches.search_user_id + JOIN events AS events + ON CONTAINS(LOWER(searches.search_string), LOWER(events.ev_name)) + JOIN searches AS searches_2 + ON CONTAINS(LOWER(searches_2.search_string), LOWER(events.ev_name)) + JOIN _s0 AS _s7 + ON _s0.user_name <> _s7.user_name AND _s7.user_id = searches_2.search_user_id + GROUP BY + searches.search_id, + 3 +) +SELECT + ANY_VALUE(anything_user_name) AS user_name, + COUNT(*) AS n_searches +FROM _t2 +WHERE + anything_search_user_id = user_id +GROUP BY + user_id +ORDER BY + 2 DESC NULLS LAST, + 1 NULLS FIRST +LIMIT 4 diff --git a/tests/test_sql_refsols/epoch_pct_searches_per_tod_snowflake.sql b/tests/test_sql_refsols/epoch_pct_searches_per_tod_snowflake.sql new file mode 100644 index 000000000..aa17edcc7 --- /dev/null +++ b/tests/test_sql_refsols/epoch_pct_searches_per_tod_snowflake.sql @@ -0,0 +1,20 @@ +WITH _t0 AS ( + SELECT + ANY_VALUE(times.t_start_hour) AS anything_t_start_hour, + COUNT(*) AS n_rows, + times.t_name + FROM times AS times + JOIN searches AS searches + ON times.t_end_hour > HOUR(CAST(searches.search_ts AS TIMESTAMP)) + AND times.t_start_hour <= HOUR(CAST(searches.search_ts AS TIMESTAMP)) + GROUP BY + 3 +) +SELECT + t_name AS tod, + ROUND(( + 100.0 * n_rows + ) / SUM(n_rows) OVER (), 2) AS pct_searches +FROM _t0 +ORDER BY + anything_t_start_hour NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_search_results_by_tod_snowflake.sql b/tests/test_sql_refsols/epoch_search_results_by_tod_snowflake.sql new file mode 100644 index 000000000..089aac377 --- /dev/null +++ b/tests/test_sql_refsols/epoch_search_results_by_tod_snowflake.sql @@ -0,0 +1,22 @@ +WITH _t0 AS ( + SELECT + ANY_VALUE(times.t_start_hour) AS anything_t_start_hour, + AVG(searches.search_num_results) AS avg_search_num_results, + COUNT(*) AS n_rows, + times.t_name + FROM times AS times + JOIN searches AS searches + ON times.t_end_hour > HOUR(CAST(searches.search_ts AS TIMESTAMP)) + AND times.t_start_hour <= HOUR(CAST(searches.search_ts AS TIMESTAMP)) + GROUP BY + 4 +) +SELECT + t_name AS tod, + ROUND(( + 100.0 * n_rows + ) / SUM(n_rows) OVER (), 2) AS pct_searches, + ROUND(avg_search_num_results, 2) AS avg_results +FROM _t0 +ORDER BY + anything_t_start_hour NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_summer_events_per_type_snowflake.sql b/tests/test_sql_refsols/epoch_summer_events_per_type_snowflake.sql new file mode 100644 index 000000000..b7091de07 --- /dev/null +++ b/tests/test_sql_refsols/epoch_summer_events_per_type_snowflake.sql @@ -0,0 +1,15 @@ +SELECT + events.ev_typ AS event_type, + COUNT(*) AS n_events +FROM events AS events +JOIN seasons AS seasons + ON ( + seasons.s_month1 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) + OR seasons.s_month2 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) + OR seasons.s_month3 = MONTH(CAST(events.ev_dt AS TIMESTAMP)) + ) + AND seasons.s_name = 'Summer' +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql b/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql new file mode 100644 index 000000000..030770bb5 --- /dev/null +++ b/tests/test_sql_refsols/epoch_unique_users_per_engine_snowflake.sql @@ -0,0 +1,25 @@ +WITH _s2 AS ( + SELECT DISTINCT + search_engine + FROM searches +), _s3 AS ( + SELECT + COUNT(DISTINCT users.user_id) AS ndistinct_user_id, + searches.search_engine + FROM searches AS searches + JOIN users AS users + ON searches.search_user_id = users.user_id + WHERE + YEAR(CAST(searches.search_ts AS TIMESTAMP)) <= 2019 + AND YEAR(CAST(searches.search_ts AS TIMESTAMP)) >= 2010 + GROUP BY + 2 +) +SELECT + _s2.search_engine AS engine, + COALESCE(_s3.ndistinct_user_id, 0) AS n_users +FROM _s2 AS _s2 +LEFT JOIN _s3 AS _s3 + ON _s2.search_engine = _s3.search_engine +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/epoch_users_most_cold_war_searches_snowflake.sql b/tests/test_sql_refsols/epoch_users_most_cold_war_searches_snowflake.sql new file mode 100644 index 000000000..244b30369 --- /dev/null +++ b/tests/test_sql_refsols/epoch_users_most_cold_war_searches_snowflake.sql @@ -0,0 +1,30 @@ +WITH _t0 AS ( + SELECT + ANY_VALUE(searches.search_user_id) AS anything_search_user_id + FROM searches AS searches + JOIN events AS events + ON CONTAINS(LOWER(searches.search_string), LOWER(events.ev_name)) + JOIN eras AS eras + ON eras.er_end_year > YEAR(CAST(events.ev_dt AS TIMESTAMP)) + AND eras.er_name = 'Cold War' + AND eras.er_start_year <= YEAR(CAST(events.ev_dt AS TIMESTAMP)) + GROUP BY + searches.search_id +), _s5 AS ( + SELECT + COUNT(*) AS n_cold_war_searches, + anything_search_user_id + FROM _t0 + GROUP BY + 2 +) +SELECT + users.user_name, + _s5.n_cold_war_searches +FROM users AS users +JOIN _s5 AS _s5 + ON _s5.anything_search_user_id = users.user_id +ORDER BY + 2 DESC NULLS LAST, + 1 NULLS FIRST +LIMIT 3 diff --git a/tests/test_sql_refsols/extract_colors_snowflake.sql b/tests/test_sql_refsols/extract_colors_snowflake.sql new file mode 100644 index 000000000..07a4abd25 --- /dev/null +++ b/tests/test_sql_refsols/extract_colors_snowflake.sql @@ -0,0 +1,12 @@ +SELECT + p_partkey AS key, + UPPER(SPLIT_PART(p_name, ' ', 1)) AS c1, + UPPER(SPLIT_PART(p_name, ' ', 2)) AS c2, + UPPER(SPLIT_PART(p_name, ' ', 3)) AS c3, + UPPER(SPLIT_PART(p_name, ' ', 4)) AS c4, + UPPER(SPLIT_PART(p_name, ' ', 5)) AS c5, + UPPER(SPLIT_PART(p_name, ' ', 6)) AS c6 +FROM tpch.part +ORDER BY + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/floor_and_ceil_2_snowflake.sql b/tests/test_sql_refsols/floor_and_ceil_2_snowflake.sql new file mode 100644 index 000000000..9c0b4527b --- /dev/null +++ b/tests/test_sql_refsols/floor_and_ceil_2_snowflake.sql @@ -0,0 +1,9 @@ +SELECT + ps_suppkey AS supplier_key, + ps_partkey AS part_key, + FLOOR(ps_availqty) AS complete_parts, + CEIL(ps_supplycost * FLOOR(ps_availqty)) AS total_cost +FROM tpch.partsupp +ORDER BY + 4 DESC NULLS LAST +LIMIT 10 diff --git a/tests/test_sql_refsols/floor_and_ceil_snowflake.sql b/tests/test_sql_refsols/floor_and_ceil_snowflake.sql new file mode 100644 index 000000000..38b321fcd --- /dev/null +++ b/tests/test_sql_refsols/floor_and_ceil_snowflake.sql @@ -0,0 +1,11 @@ +SELECT + FLOOR(5.6) AS floor_frac, + CEIL(5.4) AS ceil_frac, + FLOOR(-5.4) AS floor_frac_neg, + CEIL(-5.6) AS ceil_frac_neg, + FLOOR(6) AS floor_int, + CEIL(6) AS ceil_int, + FLOOR(-6) AS floor_int_neg, + CEIL(-6) AS ceil_int_neg +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/get_part_multiple_snowflake.sql b/tests/test_sql_refsols/get_part_multiple_snowflake.sql new file mode 100644 index 000000000..e52d4abc3 --- /dev/null +++ b/tests/test_sql_refsols/get_part_multiple_snowflake.sql @@ -0,0 +1,25 @@ +SELECT + CAST(SUBSTRING(sbcustid, 2) AS BIGINT) AS _expr0, + SPLIT_PART(sbcustname, ' ', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p1, + SPLIT_PART(sbcustname, ' ', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p2, + SPLIT_PART(sbcustemail, '.', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p3, + SPLIT_PART(sbcustemail, '.', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p4, + SPLIT_PART(sbcustphone, '-', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p5, + SPLIT_PART(sbcustphone, '-', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p6, + SPLIT_PART(sbcustpostalcode, '00', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p7, + SPLIT_PART(sbcustpostalcode, '00', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p8, + SPLIT_PART(sbcustname, '!', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p9, + SPLIT_PART(sbcustname, '@', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p10, + SPLIT_PART(sbcustname, 'aa', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p11, + SPLIT_PART(sbcustname, '#$*', 0 - CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p12, + SPLIT_PART(sbcustname, '', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p13, + SPLIT_PART('', ' ', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p14, + SPLIT_PART(sbcustname, ' ', 0) AS p15, + SPLIT_PART(sbcuststate, sbcuststate, CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p16, + SPLIT_PART(SPLIT_PART(sbcustphone, '-', 1), '5', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p17, + SPLIT_PART(sbcustpostalcode, '0', CAST(SUBSTRING(sbcustid, 2) AS BIGINT)) AS p18 +FROM main.sbcustomer +WHERE + CAST(SUBSTRING(sbcustid, 2) AS BIGINT) <= 4 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/get_part_single_snowflake.sql b/tests/test_sql_refsols/get_part_single_snowflake.sql new file mode 100644 index 000000000..a9694cfb1 --- /dev/null +++ b/tests/test_sql_refsols/get_part_single_snowflake.sql @@ -0,0 +1,5 @@ +SELECT + SPLIT_PART(sbcustname, ' ', -1) AS last_name +FROM main.sbcustomer +WHERE + sbcustname = 'Alex Rodriguez' diff --git a/tests/test_sql_refsols/global_acctbal_breakdown_snowflake.sql b/tests/test_sql_refsols/global_acctbal_breakdown_snowflake.sql new file mode 100644 index 000000000..973d5975d --- /dev/null +++ b/tests/test_sql_refsols/global_acctbal_breakdown_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, + MEDIAN(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS median_red_acctbal, + MEDIAN(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS median_black_acctbal, + MEDIAN(c_acctbal) AS median_overall_acctbal +FROM tpch.customer diff --git a/tests/test_sql_refsols/hour_minute_day_snowflake.sql b/tests/test_sql_refsols/hour_minute_day_snowflake.sql new file mode 100644 index 000000000..571c2b0be --- /dev/null +++ b/tests/test_sql_refsols/hour_minute_day_snowflake.sql @@ -0,0 +1,11 @@ +SELECT + sbtransaction.sbtxid AS transaction_id, + HOUR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) AS _expr0, + MINUTE(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) AS _expr1, + SECOND(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) AS _expr2 +FROM main.sbtransaction AS sbtransaction +JOIN main.sbticker AS sbticker + ON sbticker.sbtickerid = sbtransaction.sbtxtickerid + AND sbticker.sbtickersymbol IN ('AAPL', 'GOOGL', 'NFLX') +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/nation_acctbal_breakdown_snowflake.sql b/tests/test_sql_refsols/nation_acctbal_breakdown_snowflake.sql new file mode 100644 index 000000000..5a3587e17 --- /dev/null +++ b/tests/test_sql_refsols/nation_acctbal_breakdown_snowflake.sql @@ -0,0 +1,26 @@ +WITH _s3 AS ( + SELECT + MEDIAN(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS median_black_acctbal, + MEDIAN(c_acctbal) AS median_overall_acctbal, + MEDIAN(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS median_red_acctbal, + COUNT(CASE WHEN c_acctbal >= 0 THEN c_acctbal ELSE NULL END) AS n_black_acctbal, + COUNT(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END) AS n_red_acctbal, + c_nationkey + FROM tpch.customer + GROUP BY + 6 +) +SELECT + nation.n_name AS nation_name, + _s3.n_red_acctbal, + _s3.n_black_acctbal, + _s3.median_red_acctbal, + _s3.median_black_acctbal, + _s3.median_overall_acctbal +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AMERICA' +JOIN _s3 AS _s3 + ON _s3.c_nationkey = nation.n_nationkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/numerical_functions_snowflake.sql b/tests/test_sql_refsols/numerical_functions_snowflake.sql new file mode 100644 index 000000000..d02e9691a --- /dev/null +++ b/tests/test_sql_refsols/numerical_functions_snowflake.sql @@ -0,0 +1,11 @@ +SELECT + ABS(c_acctbal) AS abs_value, + ROUND(c_acctbal, 2) AS round_value, + CEIL(c_acctbal) AS ceil_value, + FLOOR(c_acctbal) AS floor_value, + POWER(c_acctbal, 2) AS power_value, + POWER(c_acctbal, 0.5) AS sqrt_value, + SIGN(c_acctbal) AS sign_value, + LEAST(c_acctbal, 0) AS smallest_value, + GREATEST(c_acctbal, 0) AS largest_value +FROM tpch.customer diff --git a/tests/test_sql_refsols/part_cross_part_a_snowflake.sql b/tests/test_sql_refsols/part_cross_part_a_snowflake.sql new file mode 100644 index 000000000..5d7c38cea --- /dev/null +++ b/tests/test_sql_refsols/part_cross_part_a_snowflake.sql @@ -0,0 +1,35 @@ +WITH _s0 AS ( + SELECT DISTINCT + sbtickerexchange + FROM main.sbticker +), _s9 AS ( + SELECT + COUNT(*) AS n_rows, + sbcustomer.sbcustid, + _s2.sbtickerexchange + FROM _s0 AS _s2 + CROSS JOIN main.sbcustomer AS sbcustomer + JOIN main.sbtransaction AS sbtransaction + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid + JOIN main.sbticker AS sbticker + ON _s2.sbtickerexchange = sbticker.sbtickerexchange + AND sbticker.sbtickerid = sbtransaction.sbtxtickerid + GROUP BY + 2, + 3 +) +SELECT + sbcustomer.sbcuststate AS state, + _s0.sbtickerexchange AS exchange, + COALESCE(SUM(_s9.n_rows), 0) AS n +FROM _s0 AS _s0 +CROSS JOIN main.sbcustomer AS sbcustomer +LEFT JOIN _s9 AS _s9 + ON _s0.sbtickerexchange = _s9.sbtickerexchange + AND _s9.sbcustid = sbcustomer.sbcustid +GROUP BY + 1, + 2 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/part_cross_part_b_snowflake.sql b/tests/test_sql_refsols/part_cross_part_b_snowflake.sql new file mode 100644 index 000000000..695035c21 --- /dev/null +++ b/tests/test_sql_refsols/part_cross_part_b_snowflake.sql @@ -0,0 +1,46 @@ +WITH _s0 AS ( + SELECT DISTINCT + sbcuststate + FROM main.sbcustomer +), _t2 AS ( + SELECT + sbtxdatetime + FROM main.sbtransaction + WHERE + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) = 2023 +), _s1 AS ( + SELECT DISTINCT + DATE_TRUNC('MONTH', CAST(sbtxdatetime AS TIMESTAMP)) AS month + FROM _t2 +), _s3 AS ( + SELECT DISTINCT + DATE_TRUNC('MONTH', CAST(sbtxdatetime AS TIMESTAMP)) AS month + FROM _t2 +), _s9 AS ( + SELECT + COUNT(*) AS n_rows, + _s3.month, + _s2.sbcuststate + FROM _s0 AS _s2 + CROSS JOIN _s3 AS _s3 + JOIN main.sbtransaction AS sbtransaction + ON YEAR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) = 2023 + AND _s3.month = DATE_TRUNC('MONTH', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) + JOIN main.sbcustomer AS sbcustomer + ON _s2.sbcuststate = sbcustomer.sbcuststate + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid + GROUP BY + 2, + 3 +) +SELECT + _s0.sbcuststate AS state, + _s1.month AS month_of_year, + SUM(COALESCE(_s9.n_rows, 0)) OVER (PARTITION BY _s0.sbcuststate ORDER BY _s1.month ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS n +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 +LEFT JOIN _s9 AS _s9 + ON _s0.sbcuststate = _s9.sbcuststate AND _s1.month = _s9.month +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/part_cross_part_c_snowflake.sql b/tests/test_sql_refsols/part_cross_part_c_snowflake.sql new file mode 100644 index 000000000..82c088a76 --- /dev/null +++ b/tests/test_sql_refsols/part_cross_part_c_snowflake.sql @@ -0,0 +1,44 @@ +WITH _s0 AS ( + SELECT DISTINCT + sbcuststate + FROM main.sbcustomer +), _t2 AS ( + SELECT + sbtxdatetime + FROM main.sbtransaction + WHERE + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) = 2023 +), _s1 AS ( + SELECT DISTINCT + DATE_TRUNC('MONTH', CAST(sbtxdatetime AS TIMESTAMP)) AS month + FROM _t2 +), _s3 AS ( + SELECT DISTINCT + DATE_TRUNC('MONTH', CAST(sbtxdatetime AS TIMESTAMP)) AS month + FROM _t2 +), _s9 AS ( + SELECT + COUNT(*) AS n_rows, + _s3.month, + _s2.sbcuststate + FROM _s0 AS _s2 + CROSS JOIN _s3 AS _s3 + JOIN main.sbtransaction AS sbtransaction + ON YEAR(CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) = 2023 + AND _s3.month = DATE_TRUNC('MONTH', CAST(sbtransaction.sbtxdatetime AS TIMESTAMP)) + JOIN main.sbcustomer AS sbcustomer + ON _s2.sbcuststate = sbcustomer.sbcuststate + AND sbcustomer.sbcustid = sbtransaction.sbtxcustid + GROUP BY + 2, + 3 +) +SELECT + _s0.sbcuststate AS state, + MAX(COALESCE(_s9.n_rows, 0)) AS max_n +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 +LEFT JOIN _s9 AS _s9 + ON _s0.sbcuststate = _s9.sbcuststate AND _s1.month = _s9.month +GROUP BY + 1 diff --git a/tests/test_sql_refsols/quantile_test_1_snowflake.sql b/tests/test_sql_refsols/quantile_test_1_snowflake.sql new file mode 100644 index 000000000..00345f528 --- /dev/null +++ b/tests/test_sql_refsols/quantile_test_1_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + PERCENTILE_DISC(0.7) WITHIN GROUP (ORDER BY + orders.o_totalprice) AS seventieth_order_price +FROM tpch.customer AS customer +JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1998 + AND customer.c_custkey = orders.o_custkey diff --git a/tests/test_sql_refsols/quantile_test_2_snowflake.sql b/tests/test_sql_refsols/quantile_test_2_snowflake.sql new file mode 100644 index 000000000..d0703b387 --- /dev/null +++ b/tests/test_sql_refsols/quantile_test_2_snowflake.sql @@ -0,0 +1,56 @@ +WITH _s0 AS ( + SELECT + n_name, + n_nationkey, + n_regionkey + FROM tpch.nation + ORDER BY + 1 NULLS FIRST + LIMIT 5 +), _s5 AS ( + SELECT + PERCENTILE_DISC(0.1) WITHIN GROUP (ORDER BY + orders.o_totalprice) AS agg_0, + PERCENTILE_DISC(0.01) WITHIN GROUP (ORDER BY + orders.o_totalprice) AS agg_1, + PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY + orders.o_totalprice) AS agg_2, + PERCENTILE_DISC(0.75) WITHIN GROUP (ORDER BY + orders.o_totalprice) AS agg_3, + PERCENTILE_DISC(0.9) WITHIN GROUP (ORDER BY + orders.o_totalprice) AS agg_4, + PERCENTILE_DISC(0.99) WITHIN GROUP (ORDER BY + orders.o_totalprice) AS agg_5, + PERCENTILE_DISC(1.0) WITHIN GROUP (ORDER BY + orders.o_totalprice) AS agg_6, + PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY + orders.o_totalprice) AS agg_7, + PERCENTILE_DISC(0.0) WITHIN GROUP (ORDER BY + orders.o_totalprice) AS agg_8, + customer.c_nationkey + FROM tpch.customer AS customer + JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1998 + AND customer.c_custkey = orders.o_custkey + GROUP BY + 10 +) +SELECT + region.r_name AS region_name, + _s0.n_name AS nation_name, + _s5.agg_8 AS orders_min, + _s5.agg_1 AS orders_1_percent, + _s5.agg_0 AS orders_10_percent, + _s5.agg_2 AS orders_25_percent, + _s5.agg_7 AS orders_median, + _s5.agg_3 AS orders_75_percent, + _s5.agg_4 AS orders_90_percent, + _s5.agg_5 AS orders_99_percent, + _s5.agg_6 AS orders_max +FROM _s0 AS _s0 +JOIN tpch.region AS region + ON _s0.n_regionkey = region.r_regionkey +LEFT JOIN _s5 AS _s5 + ON _s0.n_nationkey = _s5.c_nationkey +ORDER BY + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/rank_a_snowflake.sql b/tests/test_sql_refsols/rank_a_snowflake.sql new file mode 100644 index 000000000..483dd35e1 --- /dev/null +++ b/tests/test_sql_refsols/rank_a_snowflake.sql @@ -0,0 +1,4 @@ +SELECT + c_custkey AS id, + ROW_NUMBER() OVER (ORDER BY c_acctbal DESC) AS rk +FROM tpch.customer diff --git a/tests/test_sql_refsols/rank_b_snowflake.sql b/tests/test_sql_refsols/rank_b_snowflake.sql new file mode 100644 index 000000000..14229140a --- /dev/null +++ b/tests/test_sql_refsols/rank_b_snowflake.sql @@ -0,0 +1,4 @@ +SELECT + o_orderkey AS order_key, + RANK() OVER (ORDER BY o_orderpriority) AS rank +FROM tpch.orders diff --git a/tests/test_sql_refsols/rank_c_snowflake.sql b/tests/test_sql_refsols/rank_c_snowflake.sql new file mode 100644 index 000000000..159e7b9ba --- /dev/null +++ b/tests/test_sql_refsols/rank_c_snowflake.sql @@ -0,0 +1,4 @@ +SELECT + o_orderdate AS order_date, + DENSE_RANK() OVER (ORDER BY o_orderdate) AS rank +FROM tpch.orders diff --git a/tests/test_sql_refsols/region_acctbal_breakdown_snowflake.sql b/tests/test_sql_refsols/region_acctbal_breakdown_snowflake.sql new file mode 100644 index 000000000..b4ab449d5 --- /dev/null +++ b/tests/test_sql_refsols/region_acctbal_breakdown_snowflake.sql @@ -0,0 +1,26 @@ +WITH _s3 AS ( + SELECT + MEDIAN(CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) AS median_black_acctbal, + MEDIAN(customer.c_acctbal) AS median_overall_acctbal, + MEDIAN(CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) AS median_red_acctbal, + COUNT(CASE WHEN customer.c_acctbal >= 0 THEN customer.c_acctbal ELSE NULL END) AS n_black_acctbal, + COUNT(CASE WHEN customer.c_acctbal < 0 THEN customer.c_acctbal ELSE NULL END) AS n_red_acctbal, + nation.n_regionkey + FROM tpch.nation AS nation + JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey + GROUP BY + 6 +) +SELECT + region.r_name AS region_name, + _s3.n_red_acctbal, + _s3.n_black_acctbal, + _s3.median_red_acctbal, + _s3.median_black_acctbal, + _s3.median_overall_acctbal +FROM tpch.region AS region +JOIN _s3 AS _s3 + ON _s3.n_regionkey = region.r_regionkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/simple_filter_snowflake.sql b/tests/test_sql_refsols/simple_filter_snowflake.sql new file mode 100644 index 000000000..b3caa6ec4 --- /dev/null +++ b/tests/test_sql_refsols/simple_filter_snowflake.sql @@ -0,0 +1,7 @@ +SELECT + o_orderdate AS order_date, + o_orderkey, + o_totalprice +FROM tpch.orders +WHERE + o_totalprice < 1000.0 diff --git a/tests/test_sql_refsols/simple_scan_snowflake.sql b/tests/test_sql_refsols/simple_scan_snowflake.sql new file mode 100644 index 000000000..4a2aac94f --- /dev/null +++ b/tests/test_sql_refsols/simple_scan_snowflake.sql @@ -0,0 +1,3 @@ +SELECT + o_orderkey AS key +FROM tpch.orders diff --git a/tests/test_sql_refsols/simple_smallest_or_largest_snowflake.sql b/tests/test_sql_refsols/simple_smallest_or_largest_snowflake.sql new file mode 100644 index 000000000..9a77bc82b --- /dev/null +++ b/tests/test_sql_refsols/simple_smallest_or_largest_snowflake.sql @@ -0,0 +1,29 @@ +SELECT + LEAST(20, 10) AS s1, + LEAST(20, 20) AS s2, + LEAST(20, 10, 0) AS s3, + LEAST(20, 10, 10, -1, -2, 100, -200) AS s4, + LEAST(20, 10, NULL, 100, 200) AS s5, + LEAST(20.22, 10.22, -0.34) AS s6, + LEAST( + CAST('2025-01-01 00:00:00' AS TIMESTAMP), + CAST('2024-01-01 00:00:00' AS TIMESTAMP), + CAST('2023-01-01 00:00:00' AS TIMESTAMP) + ) AS s7, + LEAST('', 'alphabet soup', 'Hello World') AS s8, + LEAST(NULL, 'alphabet soup', 'Hello World') AS s9, + GREATEST(20, 10) AS l1, + GREATEST(20, 20) AS l2, + GREATEST(20, 10, 0) AS l3, + GREATEST(20, 10, 10, -1, -2, 100, -200, 300) AS l4, + GREATEST(20, 10, NULL, 100, 200) AS l5, + GREATEST(20.22, 100.22, -0.34) AS l6, + GREATEST( + CAST('2025-01-01 00:00:00' AS TIMESTAMP), + CAST('2024-01-01 00:00:00' AS TIMESTAMP), + CAST('2023-01-01 00:00:00' AS TIMESTAMP) + ) AS l7, + GREATEST('', 'alphabet soup', 'Hello World') AS l8, + GREATEST(NULL, 'alphabet soup', 'Hello World') AS l9 +FROM (VALUES + (NULL)) AS _q_0(_col_0) diff --git a/tests/test_sql_refsols/simple_var_std_snowflake.sql b/tests/test_sql_refsols/simple_var_std_snowflake.sql new file mode 100644 index 000000000..6e8b2ccfc --- /dev/null +++ b/tests/test_sql_refsols/simple_var_std_snowflake.sql @@ -0,0 +1,24 @@ +WITH _s1 AS ( + SELECT + STDDEV_POP(s_acctbal) AS pop_std, + VARIANCE_POP(s_acctbal) AS pop_var, + STDDEV(s_acctbal) AS sample_std, + VARIANCE(s_acctbal) AS sample_var, + s_nationkey + FROM tpch.supplier + GROUP BY + 5 +) +SELECT + nation.n_name AS name, + _s1.pop_var AS var, + _s1.pop_std AS std, + _s1.sample_var, + _s1.sample_std, + _s1.pop_var, + _s1.pop_std +FROM tpch.nation AS nation +JOIN _s1 AS _s1 + ON _s1.s_nationkey = nation.n_nationkey +WHERE + nation.n_name IN ('ALGERIA', 'ARGENTINA') diff --git a/tests/test_sql_refsols/simplification_1_snowflake.sql b/tests/test_sql_refsols/simplification_1_snowflake.sql new file mode 100644 index 000000000..9e7a4ecbe --- /dev/null +++ b/tests/test_sql_refsols/simplification_1_snowflake.sql @@ -0,0 +1,26 @@ +SELECT + 13 AS s00, + 0 AS s01, + COUNT(*) AS s02, + COUNT(*) + 5 AS s03, + COUNT(*) * 2 AS s04, + COUNT(*) / 8.0 AS s05, + 10 AS s06, + COUNT(*) AS s07, + ABS(COUNT(*) - 25) AS s08, + COUNT(*) + 1 AS s09, + COUNT(*) - 3 AS s10, + COUNT(*) * -1 AS s11, + COUNT(*) / 2.5 AS s12, + COUNT(*) > 10 AS s13, + COUNT(*) >= 10 AS s14, + COUNT(*) = 20 AS s15, + COUNT(*) <> 25 AS s16, + COUNT(*) < 25 AS s17, + COUNT(*) <= 25 AS s18, + COUNT(*) AS s19, + AVG(COALESCE(LENGTH(sbcustname), 0)) AS s20, + TRUE AS s21, + TRUE AS s22, + TRUE AS s23 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_2_snowflake.sql b/tests/test_sql_refsols/simplification_2_snowflake.sql new file mode 100644 index 000000000..963c3a426 --- /dev/null +++ b/tests/test_sql_refsols/simplification_2_snowflake.sql @@ -0,0 +1,50 @@ +SELECT + TRUE AS s00, + FALSE AS s01, + TRUE AS s02, + FALSE AS s03, + TRUE AS s04, + FALSE AS s05, + NULL AS s06, + NULL AS s07, + NULL AS s08, + NULL AS s09, + NULL AS s10, + NULL AS s11, + FALSE AS s12, + TRUE AS s13, + FALSE AS s14, + FALSE AS s15, + TRUE AS s16, + TRUE AS s17, + TRUE AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + FALSE AS s24, + TRUE AS s25, + TRUE AS s26, + FALSE AS s27, + TRUE AS s28, + FALSE AS s29, + 8 AS s30, + 'alphabet' AS s31, + 'SOUP' AS s32, + TRUE AS s33, + FALSE AS s34, + FALSE AS s35, + TRUE AS s36, + 3.0 AS s37, + NULL AS s38, + NULL AS s39, + NULL AS s40, + NULL AS s41, + NULL AS s42, + NULL AS s43, + NULL AS s44, + NULL AS s45, + NULL AS s46, + COALESCE(MAX(sbcustname), '') LIKE '%r%' AS s47 +FROM main.sbcustomer diff --git a/tests/test_sql_refsols/simplification_3_snowflake.sql b/tests/test_sql_refsols/simplification_3_snowflake.sql new file mode 100644 index 000000000..a1b6b5a76 --- /dev/null +++ b/tests/test_sql_refsols/simplification_3_snowflake.sql @@ -0,0 +1,66 @@ +WITH _t1 AS ( + SELECT + ROW_NUMBER() OVER (ORDER BY sbcustname) AS rank, + AVG(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER () AS ravg1, + COALESCE( + AVG(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), + 0.1 + ) AS ravg2, + COUNT(CAST(sbcustpostalcode AS BIGINT)) OVER () AS rcnt1, + COALESCE( + COUNT(CAST(sbcustpostalcode AS BIGINT)) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rcnt2, + COUNT(*) OVER () AS rsiz1, + COALESCE( + COUNT(*) OVER (ORDER BY sbcustname ROWS BETWEEN 1 FOLLOWING AND UNBOUNDED FOLLOWING), + 0.1 + ) AS rsiz2, + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER () AS rsum1, + COALESCE( + SUM(ABS(COALESCE(CAST(sbcustpostalcode AS BIGINT), 0))) OVER (ORDER BY sbcustname ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 0.1 + ) AS rsum2, + sbcustpostalcode + FROM main.sbcustomer +) +SELECT + TRUE AS s00, + TRUE AS s01, + FALSE AS s02, + FALSE AS s03, + FALSE AS s04, + FALSE AS s05, + COUNT(*) >= 3 AS s06, + FALSE AS s07, + COUNT(*) <= 6 AS s08, + FALSE AS s09, + 91 AS s10, + 0 AS s11, + 50 AS s12, + 35 AS s13, + 25.0 AS s14, + ABS(COUNT(*) * -0.75) AS s15, + 10 AS s16, + COUNT(*) AS s17, + COUNT(*) AS s18, + FALSE AS s19, + TRUE AS s20, + FALSE AS s21, + TRUE AS s22, + FALSE AS s23, + TRUE AS s24, + PERCENTILE_DISC(0.25) WITHIN GROUP (ORDER BY + ABS(CAST(sbcustpostalcode AS BIGINT))) AS s25, + MEDIAN(ABS(CAST(sbcustpostalcode AS BIGINT))) AS s26, + MIN(rank) AS s27, + MAX(rank) AS s28, + ANY_VALUE(rsum1) AS s29, + ROUND(SUM(rsum2), 2) AS s30, + ANY_VALUE(ravg1) AS s31, + ROUND(SUM(ravg2), 2) AS s32, + ANY_VALUE(rcnt1) AS s33, + ROUND(SUM(rcnt2), 2) AS s34, + ANY_VALUE(rsiz1) AS s35, + ROUND(SUM(rsiz2), 2) AS s36 +FROM _t1 diff --git a/tests/test_sql_refsols/smoke_a_snowflake.sql b/tests/test_sql_refsols/smoke_a_snowflake.sql new file mode 100644 index 000000000..9a0ae9ce2 --- /dev/null +++ b/tests/test_sql_refsols/smoke_a_snowflake.sql @@ -0,0 +1,57 @@ +SELECT + p_partkey AS key, + CAST(CONCAT_WS( + '', + SUBSTRING( + p_brand, + CASE WHEN ( + LENGTH(p_brand) + -1 + ) < 1 THEN 1 ELSE ( + LENGTH(p_brand) + -1 + ) END + ), + SUBSTRING(p_brand, 8), + SUBSTRING( + p_brand, + CASE WHEN ( + LENGTH(p_brand) + -1 + ) < 1 THEN 1 ELSE ( + LENGTH(p_brand) + -1 + ) END, + CASE + WHEN ( + LENGTH(p_brand) + 0 + ) < 1 + THEN 0 + ELSE ( + LENGTH(p_brand) + 0 + ) - CASE WHEN ( + LENGTH(p_brand) + -1 + ) < 1 THEN 1 ELSE ( + LENGTH(p_brand) + -1 + ) END + END + ) + ) AS BIGINT) AS a, + UPPER(LEAST(SPLIT_PART(p_name, ' ', 2), SPLIT_PART(p_name, ' ', -1))) AS b, + TRIM(SUBSTRING(p_name, 1, 2), 'o') AS c, + LPAD(CAST(p_size AS TEXT), 3, '0') AS d, + RPAD(CAST(p_size AS TEXT), 3, '0') AS e, + REPLACE(p_mfgr, 'Manufacturer#', 'm') AS f, + REPLACE(LOWER(p_container), ' ', '') AS g, + CASE + WHEN LENGTH('o') = 0 + THEN 0 + ELSE CAST(( + LENGTH(p_name) - LENGTH(REPLACE(p_name, 'o', '')) + ) / LENGTH('o') AS BIGINT) + END + ( + ( + CHARINDEX('o', p_name) - 1 + ) / 100.0 + ) AS h, + ROUND(POWER(GREATEST(p_size, 10), 0.5), 3) AS i +FROM tpch.part +ORDER BY + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/smoke_b_ansi.sql b/tests/test_sql_refsols/smoke_b_ansi.sql index e219c7cc8..f7cc5ec0f 100644 --- a/tests/test_sql_refsols/smoke_b_ansi.sql +++ b/tests/test_sql_refsols/smoke_b_ansi.sql @@ -33,13 +33,13 @@ SELECT 25, 'HOUR' ) AS d, - DATE_TRUNC('MINUTE', CAST('2025-01-01 12:35:13' AS TIMESTAMP)) AS e, + TIMESTAMP_TRUNC(CAST('2025-01-01 12:35:13' AS TIMESTAMP), MINUTE) AS e, DATE_ADD( - DATE_ADD(DATE_TRUNC('HOUR', CAST('2025-01-01 12:35:13' AS TIMESTAMP)), 2, 'QUARTER'), + DATE_ADD(TIMESTAMP_TRUNC(CAST('2025-01-01 12:35:13' AS TIMESTAMP), HOUR), 2, 'QUARTER'), 3, 'WEEK' ) AS f, - CAST('2025-01-01 12:35:13' AS TIMESTAMP) AS g, + DATE_TRUNC('DAY', CAST('2025-01-01 12:35:13' AS TIMESTAMP)) AS g, CONCAT_WS( ';', EXTRACT(HOUR FROM CAST('2025-01-01 12:35:13' AS TIMESTAMP)), diff --git a/tests/test_sql_refsols/smoke_b_mysql.sql b/tests/test_sql_refsols/smoke_b_mysql.sql index c48920a85..5b4b461a1 100644 --- a/tests/test_sql_refsols/smoke_b_mysql.sql +++ b/tests/test_sql_refsols/smoke_b_mysql.sql @@ -34,8 +34,20 @@ SELECT ) AS DATETIME), INTERVAL '25' HOUR ) AS d, - DATE(CAST('2025-01-01 12:35:13' AS DATETIME)) AS e, - CAST('2025-07-22' AS DATE) AS f, + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(MINUTE, '0000-01-01 00:00:00', CAST('2025-01-01 12:35:13' AS DATETIME))) MINUTE + ) AS e, + DATE_ADD( + DATE_ADD( + DATE_ADD( + '0000-01-01 00:00:00', + INTERVAL (TIMESTAMPDIFF(HOUR, '0000-01-01 00:00:00', CAST('2025-01-01 12:35:13' AS DATETIME))) HOUR + ), + INTERVAL '2' QUARTER + ), + INTERVAL '3' WEEK + ) AS f, CAST(CAST('2025-01-01 12:35:13' AS DATETIME) AS DATE) AS g, CONCAT_WS( ';', diff --git a/tests/test_sql_refsols/smoke_b_snowflake.sql b/tests/test_sql_refsols/smoke_b_snowflake.sql new file mode 100644 index 000000000..0e8b04f07 --- /dev/null +++ b/tests/test_sql_refsols/smoke_b_snowflake.sql @@ -0,0 +1,72 @@ +SELECT + o_orderkey AS key, + CONCAT_WS( + '_', + YEAR(CAST(o_orderdate AS TIMESTAMP)), + QUARTER(CAST(o_orderdate AS TIMESTAMP)), + MONTH(CAST(o_orderdate AS TIMESTAMP)), + DAY(CAST(o_orderdate AS TIMESTAMP)) + ) AS a, + CONCAT_WS( + ':', + CASE + WHEN DAYOFWEEK(o_orderdate) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(o_orderdate) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(o_orderdate) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(o_orderdate) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(o_orderdate) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(o_orderdate) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(o_orderdate) = 6 + THEN 'Saturday' + END, + DAYOFWEEK(o_orderdate) + ) AS b, + DATEADD(DAY, -13, DATEADD(MONTH, 6, DATE_TRUNC('YEAR', CAST(o_orderdate AS TIMESTAMP)))) AS c, + DATEADD(HOUR, 25, DATEADD(YEAR, 1, DATE_TRUNC('QUARTER', CAST(o_orderdate AS TIMESTAMP)))) AS d, + DATE_TRUNC('MINUTE', CAST('2025-01-01 12:35:13' AS TIMESTAMP)) AS e, + DATEADD( + WEEK, + 3, + DATEADD(QUARTER, 2, DATE_TRUNC('HOUR', CAST('2025-01-01 12:35:13' AS TIMESTAMP))) + ) AS f, + DATE_TRUNC('DAY', CAST('2025-01-01 12:35:13' AS TIMESTAMP)) AS g, + CONCAT_WS( + ';', + HOUR(CAST('2025-01-01 12:35:13' AS TIMESTAMP)), + MINUTE(CAST(CAST('2025-01-01 13:20:13' AS TIMESTAMP) AS TIMESTAMP)), + SECOND(CAST(CAST('2025-01-01 12:35:06' AS TIMESTAMP) AS TIMESTAMP)) + ) AS h, + DATEDIFF(YEAR, CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS DATETIME)) AS i, + DATEDIFF(QUARTER, CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS DATETIME)) AS j, + DATEDIFF(MONTH, CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS DATETIME)) AS k, + DATEDIFF( + WEEK, + DATEADD( + DAY, + DAYOFWEEK(CAST('1993-05-25 12:45:36' AS TIMESTAMP)) * -1, + CAST('1993-05-25 12:45:36' AS TIMESTAMP) + ), + CAST(DATEADD(DAY, DAYOFWEEK(o_orderdate) * -1, o_orderdate) AS DATETIME) + ) AS l, + DATEDIFF(DAY, CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS DATETIME)) AS m, + DATEDIFF(HOUR, CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS DATETIME)) AS n, + DATEDIFF(MINUTE, CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS DATETIME)) AS o, + DATEDIFF(SECOND, CAST('1993-05-25 12:45:36' AS TIMESTAMP), CAST(o_orderdate AS DATETIME)) AS p, + DATE_TRUNC( + 'DAY', + DATEADD(DAY, DAYOFWEEK(CAST(o_orderdate AS TIMESTAMP)) * -1, CAST(o_orderdate AS TIMESTAMP)) + ) AS q +FROM tpch.orders +WHERE + CONTAINS(o_comment, 'fo') + AND ENDSWITH(o_clerk, '5') + AND STARTSWITH(o_orderpriority, '3') +ORDER BY + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/smoke_c_snowflake.sql b/tests/test_sql_refsols/smoke_c_snowflake.sql new file mode 100644 index 000000000..4bcbbcc4d --- /dev/null +++ b/tests/test_sql_refsols/smoke_c_snowflake.sql @@ -0,0 +1,21 @@ +SELECT + COUNT(*) AS a, + COALESCE(SUM(FLOOR(c_acctbal)), 0) AS b, + COALESCE(SUM(CEIL(c_acctbal)), 0) AS c, + COUNT(DISTINCT c_mktsegment) AS d, + ROUND(AVG(ABS(c_acctbal)), 4) AS e, + MIN(c_acctbal) AS f, + MAX(c_acctbal) AS g, + ANY_VALUE(SUBSTRING(c_name, 1, 1)) AS h, + COUNT(CASE WHEN c_acctbal > 0 THEN c_acctbal ELSE NULL END) AS i, + CEIL(VARIANCE_POP(CASE WHEN c_acctbal > 0 THEN c_acctbal ELSE NULL END)) AS j, + ROUND(VARIANCE(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END), 4) AS k, + FLOOR(STDDEV_POP(CASE WHEN c_acctbal < 0 THEN c_acctbal ELSE NULL END)) AS l, + ROUND(STDDEV(CASE WHEN c_acctbal > 0 THEN c_acctbal ELSE NULL END), 4) AS m, + ROUND(AVG(COALESCE(CASE WHEN c_acctbal > 0 THEN c_acctbal ELSE NULL END, 0)), 2) AS n, + COUNT_IF(NOT CASE WHEN c_acctbal > 1000 THEN c_acctbal ELSE NULL END IS NULL) AS o, + COUNT_IF(CASE WHEN c_acctbal > 1000 THEN c_acctbal ELSE NULL END IS NULL) AS p, + PERCENTILE_DISC(0.2) WITHIN GROUP (ORDER BY + c_acctbal) AS q, + MEDIAN(c_acctbal) AS r +FROM tpch.customer diff --git a/tests/test_sql_refsols/smoke_d_snowflake.sql b/tests/test_sql_refsols/smoke_d_snowflake.sql new file mode 100644 index 000000000..fe9dfe411 --- /dev/null +++ b/tests/test_sql_refsols/smoke_d_snowflake.sql @@ -0,0 +1,29 @@ +SELECT + customer.c_custkey AS key, + ROW_NUMBER() OVER (ORDER BY customer.c_acctbal, customer.c_custkey) AS a, + ROW_NUMBER() OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal, customer.c_custkey) AS b, + RANK() OVER (ORDER BY customer.c_mktsegment) AS c, + DENSE_RANK() OVER (ORDER BY customer.c_mktsegment) AS d, + NTILE(100) OVER (ORDER BY customer.c_acctbal, customer.c_custkey) AS e, + NTILE(12) OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal, customer.c_custkey) AS f, + LAG(customer.c_custkey, 1) OVER (ORDER BY customer.c_custkey) AS g, + LAG(customer.c_custkey, 2, -1) OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_custkey) AS h, + LEAD(customer.c_custkey, 1) OVER (ORDER BY customer.c_custkey) AS i, + LEAD(customer.c_custkey, 6000) OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_custkey) AS j, + SUM(customer.c_acctbal) OVER (PARTITION BY customer.c_nationkey) AS k, + SUM(customer.c_acctbal) OVER (ORDER BY customer.c_custkey ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS l, + ROUND(AVG(customer.c_acctbal) OVER (), 2) AS m, + ROUND( + AVG(customer.c_acctbal) OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_custkey ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), + 2 + ) AS n, + COUNT(CASE WHEN customer.c_acctbal > 0 THEN customer.c_acctbal ELSE NULL END) OVER () AS o, + COUNT(*) OVER () AS p +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +ORDER BY + 1 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_friday_one_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_friday_one_snowflake.sql new file mode 100644 index 000000000..b7abadaab --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_friday_one_snowflake.sql @@ -0,0 +1,39 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 2 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + ( + DAYOFWEEK(sbtxdatetime) + 2 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_friday_zero_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_friday_zero_snowflake.sql new file mode 100644 index 000000000..18d65f3ec --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_friday_zero_snowflake.sql @@ -0,0 +1,37 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 2 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + DAYOFWEEK(sbtxdatetime) + 2 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_monday_one_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_monday_one_snowflake.sql new file mode 100644 index 000000000..2507f64b0 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_monday_one_snowflake.sql @@ -0,0 +1,39 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 6 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + ( + DAYOFWEEK(sbtxdatetime) + 6 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_monday_zero_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_monday_zero_snowflake.sql new file mode 100644 index 000000000..9cc943050 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_monday_zero_snowflake.sql @@ -0,0 +1,37 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 6 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + DAYOFWEEK(sbtxdatetime) + 6 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_one_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_one_snowflake.sql new file mode 100644 index 000000000..cb89fecc5 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_one_snowflake.sql @@ -0,0 +1,39 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 1 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + ( + DAYOFWEEK(sbtxdatetime) + 1 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_zero_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_zero_snowflake.sql new file mode 100644 index 000000000..e05df5b6c --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_saturday_zero_snowflake.sql @@ -0,0 +1,37 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 1 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + DAYOFWEEK(sbtxdatetime) + 1 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_one_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_one_snowflake.sql new file mode 100644 index 000000000..3cf42e00b --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_one_snowflake.sql @@ -0,0 +1,31 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + DAYOFWEEK(sbtxdatetime) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_zero_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_zero_snowflake.sql new file mode 100644 index 000000000..cfd9286a4 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_sunday_zero_snowflake.sql @@ -0,0 +1,31 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + DAYOFWEEK(sbtxdatetime) AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_one_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_one_snowflake.sql new file mode 100644 index 000000000..f90847533 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_one_snowflake.sql @@ -0,0 +1,39 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 3 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + ( + DAYOFWEEK(sbtxdatetime) + 3 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_zero_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_zero_snowflake.sql new file mode 100644 index 000000000..49bcc08ed --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_thursday_zero_snowflake.sql @@ -0,0 +1,37 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 3 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + DAYOFWEEK(sbtxdatetime) + 3 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_one_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_one_snowflake.sql new file mode 100644 index 000000000..c38c8adfe --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_one_snowflake.sql @@ -0,0 +1,39 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 5 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + ( + DAYOFWEEK(sbtxdatetime) + 5 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_zero_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_zero_snowflake.sql new file mode 100644 index 000000000..288348ef7 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_tuesday_zero_snowflake.sql @@ -0,0 +1,37 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 5 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + DAYOFWEEK(sbtxdatetime) + 5 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_one_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_one_snowflake.sql new file mode 100644 index 000000000..1739fbb7c --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_one_snowflake.sql @@ -0,0 +1,39 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 4 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + ( + DAYOFWEEK(sbtxdatetime) + 4 + ) % 7 + ) + 1 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_zero_snowflake.sql b/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_zero_snowflake.sql new file mode 100644 index 000000000..7359370e5 --- /dev/null +++ b/tests/test_sql_refsols/sql_transaction_week_sampler_wednesday_zero_snowflake.sql @@ -0,0 +1,37 @@ +SELECT + sbtxdatetime AS date_time, + DATE_TRUNC( + 'DAY', + DATEADD( + DAY, + ( + ( + DAYOFWEEK(CAST(sbtxdatetime AS TIMESTAMP)) + 4 + ) % 7 + ) * -1, + CAST(sbtxdatetime AS TIMESTAMP) + ) + ) AS sow, + CASE + WHEN DAYOFWEEK(sbtxdatetime) = 0 + THEN 'Sunday' + WHEN DAYOFWEEK(sbtxdatetime) = 1 + THEN 'Monday' + WHEN DAYOFWEEK(sbtxdatetime) = 2 + THEN 'Tuesday' + WHEN DAYOFWEEK(sbtxdatetime) = 3 + THEN 'Wednesday' + WHEN DAYOFWEEK(sbtxdatetime) = 4 + THEN 'Thursday' + WHEN DAYOFWEEK(sbtxdatetime) = 5 + THEN 'Friday' + WHEN DAYOFWEEK(sbtxdatetime) = 6 + THEN 'Saturday' + END AS dayname, + ( + DAYOFWEEK(sbtxdatetime) + 4 + ) % 7 AS dayofweek +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/string_functions_snowflake.sql b/tests/test_sql_refsols/string_functions_snowflake.sql new file mode 100644 index 000000000..613e229a7 --- /dev/null +++ b/tests/test_sql_refsols/string_functions_snowflake.sql @@ -0,0 +1,26 @@ +SELECT + LOWER(customer.c_name) AS lowercase_name, + UPPER(customer.c_name) AS uppercase_name, + LENGTH(customer.c_name) AS name_length, + STARTSWITH(customer.c_name, 'A') AS starts_with_A, + ENDSWITH(customer.c_name, 'z') AS ends_with_z, + CONTAINS(customer.c_name, 'sub') AS contains_sub, + customer.c_name LIKE '%test%' AS matches_like, + CONCAT_WS('::', customer.c_name, nation.n_name) AS joined_string, + LPAD(customer.c_name, 20, '*') AS lpad_name, + RPAD(customer.c_name, 20, '-') AS rpad_name, + TRIM(customer.c_name, '\n\t ') AS stripped, + TRIM(customer.c_name, 'aeiou') AS stripped_vowels, + REPLACE(customer.c_name, 'Corp', 'Inc') AS replaced_name, + REPLACE(customer.c_name, 'Ltd', '') AS removed_substr, + CASE + WHEN LENGTH('e') = 0 + THEN 0 + ELSE CAST(( + LENGTH(customer.c_name) - LENGTH(REPLACE(customer.c_name, 'e', '')) + ) / LENGTH('e') AS BIGINT) + END AS count_e, + CHARINDEX('Alex', customer.c_name) - 1 AS idx_Alex +FROM tpch.customer AS customer +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_snowflake.sql b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_snowflake.sql new file mode 100644 index 000000000..0b97ba986 --- /dev/null +++ b/tests/test_sql_refsols/technograph_battery_failure_rates_anomalies_snowflake.sql @@ -0,0 +1,29 @@ +WITH _s7 AS ( + SELECT + COUNT(*) AS n_rows, + incidents.in_device_id + FROM main.incidents AS incidents + JOIN main.errors AS errors + ON errors.er_id = incidents.in_error_id AND errors.er_name = 'Battery Failure' + GROUP BY + 2 +) +SELECT + countries.co_name AS country_name, + products.pr_name AS product_name, + ROUND(COALESCE(SUM(_s7.n_rows), 0) / COUNT(*), 2) AS ir +FROM main.countries AS countries +JOIN main.devices AS devices + ON countries.co_id = devices.de_production_country_id +JOIN main.products AS products + ON devices.de_product_id = products.pr_id +LEFT JOIN _s7 AS _s7 + ON _s7.in_device_id = devices.de_id +GROUP BY + 1, + 2 +ORDER BY + 3 DESC NULLS LAST, + 2 NULLS FIRST, + 1 NULLS FIRST +LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_cartesian_oddball_snowflake.sql b/tests/test_sql_refsols/technograph_country_cartesian_oddball_snowflake.sql new file mode 100644 index 000000000..6f371134e --- /dev/null +++ b/tests/test_sql_refsols/technograph_country_cartesian_oddball_snowflake.sql @@ -0,0 +1,12 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_other_countries + FROM main.countries +) +SELECT + countries.co_name AS name, + _s1.n_other_countries +FROM main.countries AS countries +CROSS JOIN _s1 AS _s1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql new file mode 100644 index 000000000..5835cd8c9 --- /dev/null +++ b/tests/test_sql_refsols/technograph_country_combination_analysis_snowflake.sql @@ -0,0 +1,46 @@ +WITH _s0 AS ( + SELECT + co_id, + co_name + FROM main.countries +), _s2 AS ( + SELECT + co_id + FROM main.countries +), _s7 AS ( + SELECT + COUNT(*) AS n_rows, + in_device_id + FROM main.incidents + GROUP BY + 2 +), _s9 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(_s7.n_rows) AS sum_n_rows, + _s3.co_id AS _id_3, + _s2.co_id + FROM _s2 AS _s2 + CROSS JOIN _s2 AS _s3 + JOIN main.devices AS devices + ON _s2.co_id = devices.de_production_country_id + AND _s3.co_id = devices.de_purchase_country_id + LEFT JOIN _s7 AS _s7 + ON _s7.in_device_id = devices.de_id + GROUP BY + 3, + 4 +) +SELECT + _s0.co_name AS factory_country, + _s1.co_name AS purchase_country, + ROUND(( + 1.0 * COALESCE(_s9.sum_n_rows, 0) + ) / COALESCE(_s9.n_rows, 0), 2) AS ir +FROM _s0 AS _s0 +CROSS JOIN _s0 AS _s1 +LEFT JOIN _s9 AS _s9 + ON _s0.co_id = _s9.co_id AND _s1.co_id = _s9._id_3 +ORDER BY + 3 DESC NULLS LAST +LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql new file mode 100644 index 000000000..13f73b452 --- /dev/null +++ b/tests/test_sql_refsols/technograph_country_incident_rate_analysis_snowflake.sql @@ -0,0 +1,72 @@ +WITH _t2 AS ( + SELECT + in_device_id + FROM main.incidents +), _s1 AS ( + SELECT + COUNT(*) AS n_rows, + in_device_id + FROM _t2 + GROUP BY + 2 +), _s3 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(_s1.n_rows) AS sum_n_rows, + devices.de_production_country_id + FROM main.devices AS devices + LEFT JOIN _s1 AS _s1 + ON _s1.in_device_id = devices.de_id + GROUP BY + 3 +), _s5 AS ( + SELECT + COUNT(*) AS n_rows, + in_device_id + FROM _t2 + GROUP BY + 2 +), _s7 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(_s5.n_rows) AS sum_n_rows, + devices.de_purchase_country_id + FROM main.devices AS devices + LEFT JOIN _s5 AS _s5 + ON _s5.in_device_id = devices.de_id + GROUP BY + 3 +), _s11 AS ( + SELECT + COUNT(*) AS n_rows, + in_device_id + FROM _t2 + GROUP BY + 2 +), _s13 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(_s11.n_rows) AS sum_n_rows, + users.us_country_id + FROM main.users AS users + JOIN main.devices AS devices + ON devices.de_owner_id = users.us_id + LEFT JOIN _s11 AS _s11 + ON _s11.in_device_id = devices.de_id + GROUP BY + 3 +) +SELECT + countries.co_name AS country_name, + ROUND(COALESCE(_s3.sum_n_rows, 0) / _s3.n_rows, 2) AS made_ir, + ROUND(COALESCE(_s7.sum_n_rows, 0) / _s7.n_rows, 2) AS sold_ir, + ROUND(COALESCE(_s13.sum_n_rows, 0) / COALESCE(_s13.n_rows, 0), 2) AS user_ir +FROM main.countries AS countries +JOIN _s3 AS _s3 + ON _s3.de_production_country_id = countries.co_id +JOIN _s7 AS _s7 + ON _s7.de_purchase_country_id = countries.co_id +LEFT JOIN _s13 AS _s13 + ON _s13.us_country_id = countries.co_id +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_snowflake.sql b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_snowflake.sql new file mode 100644 index 000000000..3de27b18a --- /dev/null +++ b/tests/test_sql_refsols/technograph_error_percentages_sun_set_by_error_snowflake.sql @@ -0,0 +1,22 @@ +WITH _s5 AS ( + SELECT + COUNT(*) AS n_rows, + incidents.in_error_id + FROM main.incidents AS incidents + JOIN main.devices AS devices + ON devices.de_id = incidents.in_device_id + JOIN main.products AS products + ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' + GROUP BY + 2 +) +SELECT + errors.er_name AS error, + ROUND(( + 100.0 * COALESCE(_s5.n_rows, 0) + ) / SUM(COALESCE(_s5.n_rows, 0)) OVER (), 2) AS pct +FROM main.errors AS errors +LEFT JOIN _s5 AS _s5 + ON _s5.in_error_id = errors.er_id +ORDER BY + 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql new file mode 100644 index 000000000..9b395aef9 --- /dev/null +++ b/tests/test_sql_refsols/technograph_error_rate_sun_set_by_factory_country_snowflake.sql @@ -0,0 +1,28 @@ +WITH _s3 AS ( + SELECT + COUNT(*) AS n_rows, + in_device_id + FROM main.incidents + GROUP BY + 2 +), _s5 AS ( + SELECT + COALESCE(SUM(_s3.n_rows), 0) AS sum_n_incidents, + devices.de_production_country_id, + COUNT(*) AS n_rows + FROM main.devices AS devices + JOIN main.products AS products + ON devices.de_product_id = products.pr_id AND products.pr_name = 'Sun-Set' + LEFT JOIN _s3 AS _s3 + ON _s3.in_device_id = devices.de_id + GROUP BY + 2 +) +SELECT + countries.co_name AS country, + ROUND(COALESCE(_s5.sum_n_incidents, 0) / COALESCE(_s5.n_rows, 0), 2) AS ir +FROM main.countries AS countries +LEFT JOIN _s5 AS _s5 + ON _s5.de_production_country_id = countries.co_id +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_global_incident_rate_snowflake.sql b/tests/test_sql_refsols/technograph_global_incident_rate_snowflake.sql new file mode 100644 index 000000000..2d5a8ae63 --- /dev/null +++ b/tests/test_sql_refsols/technograph_global_incident_rate_snowflake.sql @@ -0,0 +1,13 @@ +WITH _s0 AS ( + SELECT + COUNT(*) AS n_rows + FROM main.incidents +), _s1 AS ( + SELECT + COUNT(*) AS n_rows + FROM main.devices +) +SELECT + ROUND(_s0.n_rows / _s1.n_rows, 2) AS ir +FROM _s0 AS _s0 +CROSS JOIN _s1 AS _s1 diff --git a/tests/test_sql_refsols/technograph_hot_purchase_window_snowflake.sql b/tests/test_sql_refsols/technograph_hot_purchase_window_snowflake.sql new file mode 100644 index 000000000..d7cbf77f4 --- /dev/null +++ b/tests/test_sql_refsols/technograph_hot_purchase_window_snowflake.sql @@ -0,0 +1,17 @@ +SELECT + calendar.ca_dt AS start_of_period, + COUNT(*) AS n_purchases +FROM main.calendar AS calendar +JOIN main.calendar AS calendar_2 + ON calendar.ca_dt <= calendar_2.ca_dt + AND calendar_2.ca_dt < DATEADD(DAY, 5, CAST(calendar.ca_dt AS TIMESTAMP)) +JOIN main.devices AS devices + ON calendar_2.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) +WHERE + YEAR(CAST(calendar.ca_dt AS TIMESTAMP)) = 2024 +GROUP BY + 1 +ORDER BY + 2 DESC NULLS LAST, + 1 NULLS FIRST +LIMIT 1 diff --git a/tests/test_sql_refsols/technograph_incident_rate_by_release_year_snowflake.sql b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_snowflake.sql new file mode 100644 index 000000000..316a5e3b1 --- /dev/null +++ b/tests/test_sql_refsols/technograph_incident_rate_by_release_year_snowflake.sql @@ -0,0 +1,41 @@ +WITH _s0 AS ( + SELECT + COUNT(*) AS n_rows, + de_product_id + FROM main.devices + GROUP BY + 2 +), _s1 AS ( + SELECT + pr_id, + pr_release + FROM main.products +), _s6 AS ( + SELECT + YEAR(CAST(_s1.pr_release AS TIMESTAMP)) AS release_year, + SUM(_s0.n_rows) AS sum_n_rows + FROM _s0 AS _s0 + JOIN _s1 AS _s1 + ON _s0.de_product_id = _s1.pr_id + GROUP BY + 1 +), _s7 AS ( + SELECT + COUNT(*) AS n_rows, + YEAR(CAST(_s3.pr_release AS TIMESTAMP)) AS release_year + FROM main.devices AS devices + JOIN _s1 AS _s3 + ON _s3.pr_id = devices.de_product_id + JOIN main.incidents AS incidents + ON devices.de_id = incidents.in_device_id + GROUP BY + 2 +) +SELECT + _s6.release_year AS year, + ROUND(COALESCE(_s7.n_rows, 0) / _s6.sum_n_rows, 2) AS ir +FROM _s6 AS _s6 +LEFT JOIN _s7 AS _s7 + ON _s6.release_year = _s7.release_year +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_incident_rate_per_brand_snowflake.sql b/tests/test_sql_refsols/technograph_incident_rate_per_brand_snowflake.sql new file mode 100644 index 000000000..075b007a3 --- /dev/null +++ b/tests/test_sql_refsols/technograph_incident_rate_per_brand_snowflake.sql @@ -0,0 +1,20 @@ +WITH _s3 AS ( + SELECT + COUNT(*) AS n_rows, + in_device_id + FROM main.incidents + GROUP BY + 2 +) +SELECT + products.pr_brand AS brand, + ROUND(COALESCE(SUM(_s3.n_rows), 0) / COUNT(*), 2) AS ir +FROM main.devices AS devices +JOIN main.products AS products + ON devices.de_product_id = products.pr_id +LEFT JOIN _s3 AS _s3 + ON _s3.in_device_id = devices.de_id +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_monthly_incident_rate_snowflake.sql b/tests/test_sql_refsols/technograph_monthly_incident_rate_snowflake.sql new file mode 100644 index 000000000..846bb0250 --- /dev/null +++ b/tests/test_sql_refsols/technograph_monthly_incident_rate_snowflake.sql @@ -0,0 +1,62 @@ +WITH _t2 AS ( + SELECT + ca_dt + FROM main.calendar + WHERE + YEAR(CAST(ca_dt AS TIMESTAMP)) IN (2020, 2021) +), _t5 AS ( + SELECT + co_id, + co_name + FROM main.countries + WHERE + co_name = 'CN' +), _s7 AS ( + SELECT + COUNT(*) AS n_rows, + _t4.ca_dt + FROM _t2 AS _t4 + JOIN main.calendar AS calendar + ON calendar.ca_dt >= DATEADD(MONTH, -6, CAST(_t4.ca_dt AS TIMESTAMP)) + JOIN main.devices AS devices + ON calendar.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) + JOIN _t5 AS _t5 + ON _t5.co_id = devices.de_production_country_id + GROUP BY + 2 +), _s15 AS ( + SELECT + COUNT(*) AS n_rows, + _t7.ca_dt + FROM _t2 AS _t7 + JOIN main.incidents AS incidents + ON _t7.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + JOIN main.devices AS devices + ON devices.de_id = incidents.in_device_id + JOIN _t5 AS _t8 + ON _t8.co_id = devices.de_production_country_id + GROUP BY + 2 +), _t0 AS ( + SELECT + MONTH(CAST(_t2.ca_dt AS TIMESTAMP)) AS month, + SUM(_s7.n_rows) AS sum_expr_3, + SUM(_s15.n_rows) AS sum_n_rows, + YEAR(CAST(_t2.ca_dt AS TIMESTAMP)) AS year + FROM _t2 AS _t2 + LEFT JOIN _s7 AS _s7 + ON _s7.ca_dt = _t2.ca_dt + LEFT JOIN _s15 AS _s15 + ON _s15.ca_dt = _t2.ca_dt + GROUP BY + 1, + 4 +) +SELECT + CONCAT_WS('-', year, LPAD(month, 2, '0')) AS month, + ROUND(( + 1000000.0 * COALESCE(sum_n_rows, 0) + ) / COALESCE(sum_expr_3, 0), 2) AS ir +FROM _t0 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_most_unreliable_products_snowflake.sql b/tests/test_sql_refsols/technograph_most_unreliable_products_snowflake.sql new file mode 100644 index 000000000..38be32ec0 --- /dev/null +++ b/tests/test_sql_refsols/technograph_most_unreliable_products_snowflake.sql @@ -0,0 +1,29 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + in_device_id + FROM main.incidents + GROUP BY + 2 +), _s3 AS ( + SELECT + COUNT(*) AS n_rows, + SUM(_s1.n_rows) AS sum_n_incidents, + devices.de_product_id + FROM main.devices AS devices + LEFT JOIN _s1 AS _s1 + ON _s1.in_device_id = devices.de_id + GROUP BY + 3 +) +SELECT + products.pr_name AS product, + products.pr_brand AS product_brand, + products.pr_type AS product_type, + ROUND(COALESCE(_s3.sum_n_incidents, 0) / _s3.n_rows, 2) AS ir +FROM main.products AS products +JOIN _s3 AS _s3 + ON _s3.de_product_id = products.pr_id +ORDER BY + 4 DESC NULLS LAST +LIMIT 5 diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_snowflake.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_snowflake.sql new file mode 100644 index 000000000..1a33ae4a2 --- /dev/null +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_goldcopperstar_snowflake.sql @@ -0,0 +1,83 @@ +WITH _s14 AS ( + SELECT + ANY_VALUE(pr_release) AS release_date + FROM main.products + WHERE + pr_name = 'GoldCopper-Star' +), _s6 AS ( + SELECT + ca_dt + FROM main.calendar +), _t5 AS ( + SELECT + pr_id, + pr_name + FROM main.products + WHERE + pr_name = 'GoldCopper-Star' +), _s7 AS ( + SELECT + COUNT(*) AS n_rows, + _s0.ca_dt + FROM _s6 AS _s0 + JOIN main.incidents AS incidents + ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + JOIN main.devices AS devices + ON devices.de_id = incidents.in_device_id + JOIN _t5 AS _t5 + ON _t5.pr_id = devices.de_product_id + GROUP BY + 2 +), _s13 AS ( + SELECT + COUNT(*) AS n_rows, + _s8.ca_dt + FROM _s6 AS _s8 + JOIN main.devices AS devices + ON _s8.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) + JOIN _t5 AS _t7 + ON _t7.pr_id = devices.de_product_id + GROUP BY + 2 +), _s15 AS ( + SELECT + SUM(_s7.n_rows) AS sum_expr_4, + SUM(_s13.n_rows) AS sum_n_rows, + YEAR(CAST(_s6.ca_dt AS TIMESTAMP)) AS year_1 + FROM _s6 AS _s6 + LEFT JOIN _s7 AS _s7 + ON _s6.ca_dt = _s7.ca_dt + LEFT JOIN _s13 AS _s13 + ON _s13.ca_dt = _s6.ca_dt + GROUP BY + 3 +) +SELECT + _s15.year_1 - YEAR(CAST(_s14.release_date AS TIMESTAMP)) AS years_since_release, + ROUND( + SUM(COALESCE(_s15.sum_expr_4, 0)) OVER (ORDER BY _s15.year_1 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / SUM(COALESCE(_s15.sum_n_rows, 0)) OVER (ORDER BY _s15.year_1 ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + ( + 100.0 * ( + COALESCE(_s15.sum_n_rows, 0) - LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year_1) + ) + ) / LAG(COALESCE(_s15.sum_n_rows, 0), 1) OVER (ORDER BY _s15.year_1), + 2 + ) AS pct_bought_change, + ROUND( + ( + 100.0 * ( + COALESCE(_s15.sum_expr_4, 0) - LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year_1) + ) + ) / LAG(COALESCE(_s15.sum_expr_4, 0), 1) OVER (ORDER BY _s15.year_1), + 2 + ) AS pct_incident_change, + COALESCE(_s15.sum_n_rows, 0) AS bought, + COALESCE(_s15.sum_expr_4, 0) AS incidents +FROM _s14 AS _s14 +JOIN _s15 AS _s15 + ON _s15.year_1 >= YEAR(CAST(_s14.release_date AS TIMESTAMP)) +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_snowflake.sql b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_snowflake.sql new file mode 100644 index 000000000..fc1622d96 --- /dev/null +++ b/tests/test_sql_refsols/technograph_year_cumulative_incident_rate_overall_snowflake.sql @@ -0,0 +1,64 @@ +WITH _s2 AS ( + SELECT + ca_dt + FROM main.calendar +), _s3 AS ( + SELECT + COUNT(*) AS n_rows, + _s0.ca_dt + FROM _s2 AS _s0 + JOIN main.devices AS devices + ON _s0.ca_dt = DATE_TRUNC('DAY', CAST(devices.de_purchase_ts AS TIMESTAMP)) + GROUP BY + 2 +), _s7 AS ( + SELECT + COUNT(*) AS n_rows, + _s4.ca_dt + FROM _s2 AS _s4 + JOIN main.incidents AS incidents + ON _s4.ca_dt = DATE_TRUNC('DAY', CAST(incidents.in_error_report_ts AS TIMESTAMP)) + GROUP BY + 2 +), _t1 AS ( + SELECT + SUM(_s3.n_rows) AS sum_expr_3, + SUM(_s7.n_rows) AS sum_n_rows, + YEAR(CAST(_s2.ca_dt AS TIMESTAMP)) AS year + FROM _s2 AS _s2 + LEFT JOIN _s3 AS _s3 + ON _s2.ca_dt = _s3.ca_dt + LEFT JOIN _s7 AS _s7 + ON _s2.ca_dt = _s7.ca_dt + GROUP BY + 3 +) +SELECT + year AS yr, + ROUND( + SUM(COALESCE(sum_n_rows, 0)) OVER (ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) / SUM(COALESCE(sum_expr_3, 0)) OVER (ORDER BY year ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW), + 2 + ) AS cum_ir, + ROUND( + ( + 100.0 * ( + COALESCE(sum_expr_3, 0) - LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year) + ) + ) / LAG(COALESCE(sum_expr_3, 0), 1) OVER (ORDER BY year), + 2 + ) AS pct_bought_change, + ROUND( + ( + 100.0 * ( + COALESCE(sum_n_rows, 0) - LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year) + ) + ) / LAG(COALESCE(sum_n_rows, 0), 1) OVER (ORDER BY year), + 2 + ) AS pct_incident_change, + COALESCE(sum_expr_3, 0) AS bought, + COALESCE(sum_n_rows, 0) AS incidents +FROM _t1 +WHERE + NOT sum_expr_3 IS NULL AND sum_expr_3 > 0 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/time_threshold_reached_snowflake.sql b/tests/test_sql_refsols/time_threshold_reached_snowflake.sql new file mode 100644 index 000000000..4902f2306 --- /dev/null +++ b/tests/test_sql_refsols/time_threshold_reached_snowflake.sql @@ -0,0 +1,23 @@ +WITH _t3 AS ( + SELECT + ( + 100.0 * SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY sbtxdatetime ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) + ) / SUM(sbtxshares) OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP))) AS pct_of_day, + sbtxdatetime + FROM main.sbtransaction + WHERE + YEAR(CAST(sbtxdatetime AS TIMESTAMP)) = 2023 +), _t1 AS ( + SELECT + sbtxdatetime + FROM _t3 + WHERE + pct_of_day >= 50.0 + QUALIFY + ROW_NUMBER() OVER (PARTITION BY DATE_TRUNC('DAY', CAST(sbtxdatetime AS TIMESTAMP)) ORDER BY pct_of_day) = 1 +) +SELECT + sbtxdatetime AS date_time +FROM _t1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q10_snowflake.sql b/tests/test_sql_refsols/tpch_q10_snowflake.sql new file mode 100644 index 000000000..1ae176529 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q10_snowflake.sql @@ -0,0 +1,33 @@ +WITH _s3 AS ( + SELECT + SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )) AS sum_expr_1, + orders.o_custkey + FROM tpch.orders AS orders + JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey AND lineitem.l_returnflag = 'R' + WHERE + QUARTER(CAST(orders.o_orderdate AS TIMESTAMP)) = 4 + AND YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1993 + GROUP BY + 2 +) +SELECT + customer.c_custkey AS C_CUSTKEY, + customer.c_name AS C_NAME, + COALESCE(_s3.sum_expr_1, 0) AS REVENUE, + customer.c_acctbal AS C_ACCTBAL, + nation.n_name AS N_NAME, + customer.c_address AS C_ADDRESS, + customer.c_phone AS C_PHONE, + customer.c_comment AS C_COMMENT +FROM tpch.customer AS customer +LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +ORDER BY + 3 DESC NULLS LAST, + 1 NULLS FIRST +LIMIT 20 diff --git a/tests/test_sql_refsols/tpch_q11_snowflake.sql b/tests/test_sql_refsols/tpch_q11_snowflake.sql new file mode 100644 index 000000000..677c62d95 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q11_snowflake.sql @@ -0,0 +1,43 @@ +WITH _s0 AS ( + SELECT + s_nationkey, + s_suppkey + FROM tpch.supplier +), _t2 AS ( + SELECT + n_name, + n_nationkey + FROM tpch.nation + WHERE + n_name = 'GERMANY' +), _s8 AS ( + SELECT + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_metric + FROM tpch.partsupp AS partsupp + JOIN _s0 AS _s0 + ON _s0.s_suppkey = partsupp.ps_suppkey + JOIN _t2 AS _t2 + ON _s0.s_nationkey = _t2.n_nationkey +), _s9 AS ( + SELECT + SUM(partsupp.ps_supplycost * partsupp.ps_availqty) AS sum_expr_2, + partsupp.ps_partkey + FROM tpch.partsupp AS partsupp + JOIN _s0 AS _s4 + ON _s4.s_suppkey = partsupp.ps_suppkey + JOIN _t2 AS _t4 + ON _s4.s_nationkey = _t4.n_nationkey + GROUP BY + 2 +) +SELECT + _s9.ps_partkey AS PS_PARTKEY, + COALESCE(_s9.sum_expr_2, 0) AS VALUE +FROM _s8 AS _s8 +JOIN _s9 AS _s9 + ON ( + COALESCE(_s8.sum_metric, 0) * 0.0001 + ) < COALESCE(_s9.sum_expr_2, 0) +ORDER BY + 2 DESC NULLS LAST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q12_snowflake.sql b/tests/test_sql_refsols/tpch_q12_snowflake.sql new file mode 100644 index 000000000..b02a4f3ff --- /dev/null +++ b/tests/test_sql_refsols/tpch_q12_snowflake.sql @@ -0,0 +1,18 @@ +SELECT + lineitem.l_shipmode AS L_SHIPMODE, + COALESCE(COUNT_IF(orders.o_orderpriority IN ('1-URGENT', '2-HIGH')), 0) AS HIGH_LINE_COUNT, + COALESCE(COUNT_IF(NOT orders.o_orderpriority IN ('1-URGENT', '2-HIGH')), 0) AS LOW_LINE_COUNT +FROM tpch.lineitem AS lineitem +JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey +WHERE + YEAR(CAST(lineitem.l_receiptdate AS TIMESTAMP)) = 1994 + AND lineitem.l_commitdate < lineitem.l_receiptdate + AND lineitem.l_commitdate > lineitem.l_shipdate + AND ( + lineitem.l_shipmode = 'MAIL' OR lineitem.l_shipmode = 'SHIP' + ) +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q13_snowflake.sql b/tests/test_sql_refsols/tpch_q13_snowflake.sql new file mode 100644 index 000000000..d3d53c838 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q13_snowflake.sql @@ -0,0 +1,22 @@ +WITH _s1 AS ( + SELECT + COUNT(*) AS n_rows, + o_custkey + FROM tpch.orders + WHERE + NOT o_comment LIKE '%special%requests%' + GROUP BY + 2 +) +SELECT + COALESCE(_s1.n_rows, 0) AS C_COUNT, + COUNT(*) AS CUSTDIST +FROM tpch.customer AS customer +LEFT JOIN _s1 AS _s1 + ON _s1.o_custkey = customer.c_custkey +GROUP BY + 1 +ORDER BY + 2 DESC NULLS LAST, + 1 DESC NULLS LAST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q14_snowflake.sql b/tests/test_sql_refsols/tpch_q14_snowflake.sql new file mode 100644 index 000000000..7c04495f8 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q14_snowflake.sql @@ -0,0 +1,23 @@ +SELECT + ( + 100.0 * COALESCE( + SUM( + IFF( + STARTSWITH(part.p_type, 'PROMO'), + lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + ), + 0 + ) + ), + 0 + ) + ) / COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS PROMO_REVENUE +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey +WHERE + MONTH(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 9 + AND YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) = 1995 diff --git a/tests/test_sql_refsols/tpch_q15_snowflake.sql b/tests/test_sql_refsols/tpch_q15_snowflake.sql new file mode 100644 index 000000000..545e6f34c --- /dev/null +++ b/tests/test_sql_refsols/tpch_q15_snowflake.sql @@ -0,0 +1,48 @@ +WITH _t3 AS ( + SELECT + l_discount, + l_extendedprice, + l_shipdate, + l_suppkey + FROM tpch.lineitem + WHERE + l_shipdate < CAST('1996-04-01' AS DATE) + AND l_shipdate >= CAST('1996-01-01' AS DATE) +), _s1 AS ( + SELECT + SUM(l_extendedprice * ( + 1 - l_discount + )) AS sum_expr_2, + l_suppkey + FROM _t3 + GROUP BY + 2 +), _s2 AS ( + SELECT + MAX(COALESCE(_s1.sum_expr_2, 0)) AS max_revenue + FROM tpch.supplier AS supplier + JOIN _s1 AS _s1 + ON _s1.l_suppkey = supplier.s_suppkey +), _s5 AS ( + SELECT + SUM(l_extendedprice * ( + 1 - l_discount + )) AS sum_expr_3, + l_suppkey + FROM _t3 + GROUP BY + 2 +) +SELECT + supplier.s_suppkey AS S_SUPPKEY, + supplier.s_name AS S_NAME, + supplier.s_address AS S_ADDRESS, + supplier.s_phone AS S_PHONE, + COALESCE(_s5.sum_expr_3, 0) AS TOTAL_REVENUE +FROM _s2 AS _s2 +CROSS JOIN tpch.supplier AS supplier +JOIN _s5 AS _s5 + ON _s2.max_revenue = COALESCE(_s5.sum_expr_3, 0) + AND _s5.l_suppkey = supplier.s_suppkey +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q16_snowflake.sql b/tests/test_sql_refsols/tpch_q16_snowflake.sql new file mode 100644 index 000000000..142fca36a --- /dev/null +++ b/tests/test_sql_refsols/tpch_q16_snowflake.sql @@ -0,0 +1,24 @@ +SELECT + part.p_brand AS P_BRAND, + part.p_type AS P_TYPE, + part.p_size AS P_SIZE, + COUNT(DISTINCT partsupp.ps_suppkey) AS SUPPLIER_COUNT +FROM tpch.partsupp AS partsupp +JOIN tpch.supplier AS supplier + ON NOT supplier.s_comment LIKE '%Customer%Complaints%' + AND partsupp.ps_suppkey = supplier.s_suppkey +JOIN tpch.part AS part + ON NOT STARTSWITH(part.p_type, 'MEDIUM POLISHED%') + AND part.p_brand <> 'BRAND#45' + AND part.p_partkey = partsupp.ps_partkey + AND part.p_size IN (49, 14, 23, 45, 19, 3, 36, 9) +GROUP BY + 1, + 2, + 3 +ORDER BY + 4 DESC NULLS LAST, + 1 NULLS FIRST, + 2 NULLS FIRST, + 3 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q17_snowflake.sql b/tests/test_sql_refsols/tpch_q17_snowflake.sql new file mode 100644 index 000000000..5338e7f5c --- /dev/null +++ b/tests/test_sql_refsols/tpch_q17_snowflake.sql @@ -0,0 +1,16 @@ +WITH _t2 AS ( + SELECT + lineitem.l_extendedprice + FROM tpch.part AS part + JOIN tpch.lineitem AS lineitem + ON lineitem.l_partkey = part.p_partkey + WHERE + part.p_brand = 'Brand#23' AND part.p_container = 'MED BOX' + QUALIFY + lineitem.l_quantity < ( + 0.2 * AVG(lineitem.l_quantity) OVER (PARTITION BY lineitem.l_partkey) + ) +) +SELECT + COALESCE(SUM(l_extendedprice), 0) / 7.0 AS AVG_YEARLY +FROM _t2 diff --git a/tests/test_sql_refsols/tpch_q18_snowflake.sql b/tests/test_sql_refsols/tpch_q18_snowflake.sql new file mode 100644 index 000000000..917d29d0b --- /dev/null +++ b/tests/test_sql_refsols/tpch_q18_snowflake.sql @@ -0,0 +1,26 @@ +WITH _t1 AS ( + SELECT + SUM(l_quantity) AS sum_l_quantity, + l_orderkey + FROM tpch.lineitem + GROUP BY + 2 +) +SELECT + customer.c_name AS C_NAME, + customer.c_custkey AS C_CUSTKEY, + orders.o_orderkey AS O_ORDERKEY, + orders.o_orderdate AS O_ORDERDATE, + orders.o_totalprice AS O_TOTALPRICE, + COALESCE(_t1.sum_l_quantity, 0) AS TOTAL_QUANTITY +FROM tpch.orders AS orders +JOIN tpch.customer AS customer + ON customer.c_custkey = orders.o_custkey +JOIN _t1 AS _t1 + ON NOT _t1.sum_l_quantity IS NULL + AND _t1.l_orderkey = orders.o_orderkey + AND _t1.sum_l_quantity > 300 +ORDER BY + 5 DESC NULLS LAST, + 4 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q19_snowflake.sql b/tests/test_sql_refsols/tpch_q19_snowflake.sql new file mode 100644 index 000000000..3ea806f1f --- /dev/null +++ b/tests/test_sql_refsols/tpch_q19_snowflake.sql @@ -0,0 +1,36 @@ +SELECT + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON ( + ( + lineitem.l_quantity <= 11 + AND lineitem.l_quantity >= 1 + AND part.p_brand = 'Brand#12' + AND part.p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') + AND part.p_size <= 5 + AND part.p_size >= 1 + ) + OR ( + lineitem.l_quantity <= 20 + AND lineitem.l_quantity >= 10 + AND part.p_brand = 'Brand#23' + AND part.p_container IN ('MED BAG', 'MED BOX', 'MED PACK', 'MED PKG') + AND part.p_size <= 10 + AND part.p_size >= 1 + ) + OR ( + lineitem.l_quantity <= 30 + AND lineitem.l_quantity >= 20 + AND part.p_brand = 'Brand#34' + AND part.p_container IN ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG') + AND part.p_size <= 15 + AND part.p_size >= 1 + ) + ) + AND lineitem.l_partkey = part.p_partkey +WHERE + lineitem.l_shipinstruct = 'DELIVER IN PERSON' + AND lineitem.l_shipmode IN ('AIR', 'AIR REG') diff --git a/tests/test_sql_refsols/tpch_q1_snowflake.sql b/tests/test_sql_refsols/tpch_q1_snowflake.sql new file mode 100644 index 000000000..a060e085e --- /dev/null +++ b/tests/test_sql_refsols/tpch_q1_snowflake.sql @@ -0,0 +1,26 @@ +SELECT + l_returnflag AS L_RETURNFLAG, + l_linestatus AS L_LINESTATUS, + COALESCE(SUM(l_quantity), 0) AS SUM_QTY, + COALESCE(SUM(l_extendedprice), 0) AS SUM_BASE_PRICE, + COALESCE(SUM(l_extendedprice * ( + 1 - l_discount + )), 0) AS SUM_DISC_PRICE, + COALESCE(SUM(l_extendedprice * ( + 1 - l_discount + ) * ( + 1 + l_tax + )), 0) AS SUM_CHARGE, + AVG(l_quantity) AS AVG_QTY, + AVG(l_extendedprice) AS AVG_PRICE, + AVG(l_discount) AS AVG_DISC, + COUNT(*) AS COUNT_ORDER +FROM tpch.lineitem +WHERE + l_shipdate <= CAST('1998-12-01' AS DATE) +GROUP BY + 1, + 2 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q20_snowflake.sql b/tests/test_sql_refsols/tpch_q20_snowflake.sql new file mode 100644 index 000000000..175c86131 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q20_snowflake.sql @@ -0,0 +1,39 @@ +WITH _s3 AS ( + SELECT + SUM(l_quantity) AS sum_l_quantity, + l_partkey + FROM tpch.lineitem + WHERE + YEAR(CAST(l_shipdate AS TIMESTAMP)) = 1994 + GROUP BY + 2 +), _s5 AS ( + SELECT + part.p_partkey, + _s3.sum_l_quantity + FROM tpch.part AS part + JOIN _s3 AS _s3 + ON _s3.l_partkey = part.p_partkey + WHERE + STARTSWITH(part.p_name, 'forest') +), _s7 AS ( + SELECT DISTINCT + partsupp.ps_suppkey + FROM tpch.partsupp AS partsupp + JOIN _s5 AS _s5 + ON _s5.p_partkey = partsupp.ps_partkey + AND partsupp.ps_availqty > ( + 0.5 * COALESCE(_s5.sum_l_quantity, 0) + ) +) +SELECT + supplier.s_name AS S_NAME, + supplier.s_address AS S_ADDRESS +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_name = 'CANADA' AND nation.n_nationkey = supplier.s_nationkey +JOIN _s7 AS _s7 + ON _s7.ps_suppkey = supplier.s_suppkey +ORDER BY + 1 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q21_snowflake.sql b/tests/test_sql_refsols/tpch_q21_snowflake.sql new file mode 100644 index 000000000..d2b7c0da2 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q21_snowflake.sql @@ -0,0 +1,68 @@ +WITH _t5 AS ( + SELECT + l_commitdate, + l_linenumber, + l_orderkey, + l_receiptdate, + l_suppkey + FROM tpch.lineitem + WHERE + l_commitdate < l_receiptdate +), _t3 AS ( + SELECT + ANY_VALUE(_t5.l_suppkey) AS anything_l_suppkey, + ANY_VALUE(orders.o_orderstatus) AS anything_o_orderstatus, + _t5.l_linenumber, + _t5.l_orderkey, + orders.o_orderkey + FROM _t5 AS _t5 + JOIN tpch.orders AS orders + ON _t5.l_orderkey = orders.o_orderkey + JOIN tpch.lineitem AS lineitem + ON _t5.l_suppkey <> lineitem.l_suppkey AND lineitem.l_orderkey = orders.o_orderkey + GROUP BY + 3, + 4, + 5 +), _u_0 AS ( + SELECT + _t6.l_linenumber AS _u_1, + _t6.l_orderkey AS _u_2, + orders.o_orderkey AS _u_3 + FROM _t5 AS _t6 + JOIN tpch.orders AS orders + ON _t6.l_orderkey = orders.o_orderkey + JOIN tpch.lineitem AS lineitem + ON _t6.l_suppkey <> lineitem.l_suppkey + AND lineitem.l_commitdate < lineitem.l_receiptdate + AND lineitem.l_orderkey = orders.o_orderkey + GROUP BY + 1, + 2, + 3 +), _s13 AS ( + SELECT + COUNT(*) AS n_rows, + _t3.anything_l_suppkey + FROM _t3 AS _t3 + LEFT JOIN _u_0 AS _u_0 + ON _t3.l_linenumber = _u_0._u_1 + AND _t3.l_orderkey = _u_0._u_2 + AND _t3.o_orderkey = _u_0._u_3 + WHERE + _t3.anything_o_orderstatus = 'F' AND _u_0._u_1 IS NULL + GROUP BY + 2 +) +SELECT + supplier.s_name AS S_NAME, + COALESCE(_s13.n_rows, 0) AS NUMWAIT +FROM tpch.supplier AS supplier +JOIN tpch.nation AS nation + ON nation.n_name = 'SAUDI ARABIA' AND nation.n_nationkey = supplier.s_nationkey +LEFT JOIN _s13 AS _s13 + ON _s13.anything_l_suppkey = supplier.s_suppkey +ORDER BY + 2 DESC NULLS LAST, + 1 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q22_snowflake.sql b/tests/test_sql_refsols/tpch_q22_snowflake.sql new file mode 100644 index 000000000..6b975db02 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q22_snowflake.sql @@ -0,0 +1,31 @@ +WITH _s0 AS ( + SELECT + AVG(c_acctbal) AS global_avg_balance + FROM tpch.customer + WHERE + SUBSTRING(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND c_acctbal > 0.0 +), _s3 AS ( + SELECT + COUNT(*) AS n_rows, + o_custkey + FROM tpch.orders + GROUP BY + 2 +) +SELECT + SUBSTRING(customer.c_phone, 1, 2) AS CNTRY_CODE, + COUNT(*) AS NUM_CUSTS, + COALESCE(SUM(customer.c_acctbal), 0) AS TOTACCTBAL +FROM _s0 AS _s0 +JOIN tpch.customer AS customer + ON SUBSTRING(customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + AND _s0.global_avg_balance < customer.c_acctbal +LEFT JOIN _s3 AS _s3 + ON _s3.o_custkey = customer.c_custkey +WHERE + _s3.n_rows = 0 OR _s3.n_rows IS NULL +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q2_snowflake.sql b/tests/test_sql_refsols/tpch_q2_snowflake.sql new file mode 100644 index 000000000..b256ac78c --- /dev/null +++ b/tests/test_sql_refsols/tpch_q2_snowflake.sql @@ -0,0 +1,40 @@ +WITH _t1 AS ( + SELECT + nation.n_name, + part.p_mfgr, + part.p_partkey, + supplier.s_acctbal, + supplier.s_address, + supplier.s_comment, + supplier.s_name, + supplier.s_phone + FROM tpch.part AS part + JOIN tpch.partsupp AS partsupp + ON part.p_partkey = partsupp.ps_partkey + JOIN tpch.supplier AS supplier + ON partsupp.ps_suppkey = supplier.s_suppkey + JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey + JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'EUROPE' + WHERE + ENDSWITH(part.p_type, 'BRASS') AND part.p_size = 15 + QUALIFY + RANK() OVER (PARTITION BY partsupp.ps_partkey ORDER BY partsupp.ps_supplycost) = 1 +) +SELECT + s_acctbal AS S_ACCTBAL, + s_name AS S_NAME, + n_name AS N_NAME, + p_partkey AS P_PARTKEY, + p_mfgr AS P_MFGR, + s_address AS S_ADDRESS, + s_phone AS S_PHONE, + s_comment AS S_COMMENT +FROM _t1 +ORDER BY + 1 DESC NULLS LAST, + 3 NULLS FIRST, + 2 NULLS FIRST, + 4 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q3_snowflake.sql b/tests/test_sql_refsols/tpch_q3_snowflake.sql new file mode 100644 index 000000000..6804cc526 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q3_snowflake.sql @@ -0,0 +1,24 @@ +SELECT + lineitem.l_orderkey AS L_ORDERKEY, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE, + orders.o_orderdate AS O_ORDERDATE, + orders.o_shippriority AS O_SHIPPRIORITY +FROM tpch.orders AS orders +JOIN tpch.customer AS customer + ON customer.c_custkey = orders.o_custkey AND customer.c_mktsegment = 'BUILDING' +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey + AND lineitem.l_shipdate > CAST('1995-03-15' AS DATE) +WHERE + orders.o_orderdate < CAST('1995-03-15' AS DATE) +GROUP BY + 1, + 3, + 4 +ORDER BY + 2 DESC NULLS LAST, + 3 NULLS FIRST, + 1 NULLS FIRST +LIMIT 10 diff --git a/tests/test_sql_refsols/tpch_q4_snowflake.sql b/tests/test_sql_refsols/tpch_q4_snowflake.sql new file mode 100644 index 000000000..95df76f42 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q4_snowflake.sql @@ -0,0 +1,23 @@ +WITH _u_0 AS ( + SELECT + l_orderkey AS _u_1 + FROM tpch.lineitem + WHERE + l_commitdate < l_receiptdate + GROUP BY + 1 +) +SELECT + orders.o_orderpriority AS O_ORDERPRIORITY, + COUNT(*) AS ORDER_COUNT +FROM tpch.orders AS orders +LEFT JOIN _u_0 AS _u_0 + ON _u_0._u_1 = orders.o_orderkey +WHERE + NOT _u_0._u_1 IS NULL + AND QUARTER(CAST(orders.o_orderdate AS TIMESTAMP)) = 3 + AND YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) = 1993 +GROUP BY + 1 +ORDER BY + 1 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q5_snowflake.sql b/tests/test_sql_refsols/tpch_q5_snowflake.sql new file mode 100644 index 000000000..443739351 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q5_snowflake.sql @@ -0,0 +1,30 @@ +WITH _s11 AS ( + SELECT + nation.n_name, + supplier.s_suppkey + FROM tpch.supplier AS supplier + JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +) +SELECT + ANY_VALUE(nation.n_name) AS N_NAME, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE +FROM tpch.nation AS nation +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'ASIA' +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.orders AS orders + ON customer.c_custkey = orders.o_custkey + AND orders.o_orderdate < CAST('1995-01-01' AS DATE) + AND orders.o_orderdate >= CAST('1994-01-01' AS DATE) +JOIN tpch.lineitem AS lineitem + ON lineitem.l_orderkey = orders.o_orderkey +JOIN _s11 AS _s11 + ON _s11.n_name = nation.n_name AND _s11.s_suppkey = lineitem.l_suppkey +GROUP BY + nation.n_nationkey +ORDER BY + 2 DESC NULLS LAST diff --git a/tests/test_sql_refsols/tpch_q6_snowflake.sql b/tests/test_sql_refsols/tpch_q6_snowflake.sql new file mode 100644 index 000000000..93d3c085b --- /dev/null +++ b/tests/test_sql_refsols/tpch_q6_snowflake.sql @@ -0,0 +1,9 @@ +SELECT + COALESCE(SUM(l_extendedprice * l_discount), 0) AS REVENUE +FROM tpch.lineitem +WHERE + l_discount <= 0.07 + AND l_discount >= 0.05 + AND l_quantity < 24 + AND l_shipdate < CAST('1995-01-01' AS DATE) + AND l_shipdate >= CAST('1994-01-01' AS DATE) diff --git a/tests/test_sql_refsols/tpch_q7_snowflake.sql b/tests/test_sql_refsols/tpch_q7_snowflake.sql new file mode 100644 index 000000000..439889188 --- /dev/null +++ b/tests/test_sql_refsols/tpch_q7_snowflake.sql @@ -0,0 +1,46 @@ +WITH _s9 AS ( + SELECT + nation.n_name, + orders.o_orderkey + FROM tpch.orders AS orders + JOIN tpch.customer AS customer + ON customer.c_custkey = orders.o_custkey + JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey + AND ( + nation.n_name = 'FRANCE' OR nation.n_name = 'GERMANY' + ) +) +SELECT + nation.n_name AS SUPP_NATION, + _s9.n_name AS CUST_NATION, + YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) AS L_YEAR, + COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS REVENUE +FROM tpch.lineitem AS lineitem +JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN _s9 AS _s9 + ON ( + _s9.n_name = 'FRANCE' OR nation.n_name = 'FRANCE' + ) + AND ( + _s9.n_name = 'GERMANY' OR nation.n_name = 'GERMANY' + ) + AND _s9.o_orderkey = lineitem.l_orderkey + AND ( + nation.n_name = 'FRANCE' OR nation.n_name = 'GERMANY' + ) +WHERE + YEAR(CAST(lineitem.l_shipdate AS TIMESTAMP)) IN (1995, 1996) +GROUP BY + 1, + 2, + 3 +ORDER BY + 1 NULLS FIRST, + 2 NULLS FIRST, + 3 NULLS FIRST diff --git a/tests/test_sql_refsols/tpch_q8_snowflake.sql b/tests/test_sql_refsols/tpch_q8_snowflake.sql new file mode 100644 index 000000000..599805c5c --- /dev/null +++ b/tests/test_sql_refsols/tpch_q8_snowflake.sql @@ -0,0 +1,34 @@ +SELECT + YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) AS O_YEAR, + COALESCE( + SUM( + IFF( + nation_2.n_name = 'BRAZIL', + lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + ), + 0 + ) + ), + 0 + ) / COALESCE(SUM(lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + )), 0) AS MKT_SHARE +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON lineitem.l_partkey = part.p_partkey AND part.p_type = 'ECONOMY ANODIZED STEEL' +JOIN tpch.orders AS orders + ON YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) IN (1995, 1996) + AND lineitem.l_orderkey = orders.o_orderkey +JOIN tpch.customer AS customer + ON customer.c_custkey = orders.o_custkey +JOIN tpch.nation AS nation + ON customer.c_nationkey = nation.n_nationkey +JOIN tpch.region AS region + ON nation.n_regionkey = region.r_regionkey AND region.r_name = 'AMERICA' +JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation_2 + ON nation_2.n_nationkey = supplier.s_nationkey +GROUP BY + 1 diff --git a/tests/test_sql_refsols/tpch_q9_snowflake.sql b/tests/test_sql_refsols/tpch_q9_snowflake.sql new file mode 100644 index 000000000..1268e47bc --- /dev/null +++ b/tests/test_sql_refsols/tpch_q9_snowflake.sql @@ -0,0 +1,30 @@ +SELECT + nation.n_name AS NATION, + YEAR(CAST(orders.o_orderdate AS TIMESTAMP)) AS O_YEAR, + COALESCE( + SUM( + lineitem.l_extendedprice * ( + 1 - lineitem.l_discount + ) - partsupp.ps_supplycost * lineitem.l_quantity + ), + 0 + ) AS AMOUNT +FROM tpch.lineitem AS lineitem +JOIN tpch.part AS part + ON CONTAINS(part.p_name, 'green') AND lineitem.l_partkey = part.p_partkey +JOIN tpch.supplier AS supplier + ON lineitem.l_suppkey = supplier.s_suppkey +JOIN tpch.nation AS nation + ON nation.n_nationkey = supplier.s_nationkey +JOIN tpch.orders AS orders + ON lineitem.l_orderkey = orders.o_orderkey +JOIN tpch.partsupp AS partsupp + ON lineitem.l_partkey = partsupp.ps_partkey + AND lineitem.l_suppkey = partsupp.ps_suppkey +GROUP BY + 1, + 2 +ORDER BY + 1 NULLS FIRST, + 2 DESC NULLS LAST +LIMIT 10 diff --git a/tests/test_sql_refsols/unary_and_slicing_operators_ansi.sql b/tests/test_sql_refsols/unary_and_slicing_operators_ansi.sql index 129baf923..b80ccca12 100644 --- a/tests/test_sql_refsols/unary_and_slicing_operators_ansi.sql +++ b/tests/test_sql_refsols/unary_and_slicing_operators_ansi.sql @@ -23,7 +23,7 @@ SELECT LENGTH(c_name) + 0 ) - 2 ) <= 0 - THEN '' + THEN 0 ELSE ( LENGTH(c_name) + 0 ) - 2 diff --git a/tests/test_sql_refsols/unary_and_slicing_operators_snowflake.sql b/tests/test_sql_refsols/unary_and_slicing_operators_snowflake.sql new file mode 100644 index 000000000..b80ccca12 --- /dev/null +++ b/tests/test_sql_refsols/unary_and_slicing_operators_snowflake.sql @@ -0,0 +1,64 @@ +SELECT + SUBSTRING(c_phone, 1, 3) AS country_code, + SUBSTRING(c_name, 2) AS name_without_first_char, + SUBSTRING( + c_phone, + CASE WHEN ( + LENGTH(c_phone) + 0 + ) < 1 THEN 1 ELSE ( + LENGTH(c_phone) + 0 + ) END + ) AS last_digit, + SUBSTRING( + c_name, + 2, + CASE + WHEN ( + LENGTH(c_name) + 0 + ) < 1 + THEN 0 + ELSE CASE + WHEN ( + ( + LENGTH(c_name) + 0 + ) - 2 + ) <= 0 + THEN 0 + ELSE ( + LENGTH(c_name) + 0 + ) - 2 + END + END + ) AS name_without_start_and_end_char, + SUBSTRING( + c_phone, + 1, + CASE WHEN ( + LENGTH(c_phone) + -5 + ) < 0 THEN 0 ELSE ( + LENGTH(c_phone) + -5 + ) END + ) AS phone_without_last_5_chars, + SUBSTRING( + c_name, + CASE WHEN ( + LENGTH(c_name) + -1 + ) < 1 THEN 1 ELSE ( + LENGTH(c_name) + -1 + ) END, + CASE + WHEN ( + LENGTH(c_name) + 0 + ) < 1 + THEN 0 + ELSE ( + LENGTH(c_name) + 0 + ) - CASE WHEN ( + LENGTH(c_name) + -1 + ) < 1 THEN 1 ELSE ( + LENGTH(c_name) + -1 + ) END + END + ) AS name_second_to_last_char, + c_acctbal >= 0 AS is_not_in_debt +FROM tpch.customer diff --git a/tests/test_sql_refsols/unary_and_slicing_operators_sqlite.sql b/tests/test_sql_refsols/unary_and_slicing_operators_sqlite.sql index 129baf923..b80ccca12 100644 --- a/tests/test_sql_refsols/unary_and_slicing_operators_sqlite.sql +++ b/tests/test_sql_refsols/unary_and_slicing_operators_sqlite.sql @@ -23,7 +23,7 @@ SELECT LENGTH(c_name) + 0 ) - 2 ) <= 0 - THEN '' + THEN 0 ELSE ( LENGTH(c_name) + 0 ) - 2 diff --git a/tests/test_sql_refsols/week_offset_snowflake.sql b/tests/test_sql_refsols/week_offset_snowflake.sql new file mode 100644 index 000000000..8fe3d278e --- /dev/null +++ b/tests/test_sql_refsols/week_offset_snowflake.sql @@ -0,0 +1,14 @@ +SELECT + sbtxdatetime AS date_time, + DATEADD(WEEK, 1, CAST(sbtxdatetime AS TIMESTAMP)) AS week_adj1, + DATEADD(WEEK, -1, CAST(sbtxdatetime AS TIMESTAMP)) AS week_adj2, + DATEADD(WEEK, 2, DATEADD(HOUR, 1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj3, + DATEADD(WEEK, 2, DATEADD(SECOND, -1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj4, + DATEADD(WEEK, 2, DATEADD(DAY, 1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj5, + DATEADD(WEEK, 2, DATEADD(MINUTE, -1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj6, + DATEADD(WEEK, 2, DATEADD(MONTH, 1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj7, + DATEADD(WEEK, 2, DATEADD(YEAR, 1, CAST(sbtxdatetime AS TIMESTAMP))) AS week_adj8 +FROM main.sbtransaction +WHERE + DAY(CAST(sbtxdatetime AS TIMESTAMP)) > 1 + AND YEAR(CAST(sbtxdatetime AS TIMESTAMP)) < 2025 diff --git a/tests/test_sql_refsols/window_functions_snowflake.sql b/tests/test_sql_refsols/window_functions_snowflake.sql new file mode 100644 index 000000000..63535e1b4 --- /dev/null +++ b/tests/test_sql_refsols/window_functions_snowflake.sql @@ -0,0 +1,15 @@ +SELECT + DENSE_RANK() OVER (ORDER BY customer.c_acctbal DESC) AS rank_value, + NTILE(10) OVER (ORDER BY customer.c_acctbal) AS precentile_value, + LAG(customer.c_acctbal, 2, 0.0) OVER (PARTITION BY nation.n_regionkey ORDER BY customer.c_acctbal) AS two_prev_value, + LEAD(customer.c_acctbal, 2) OVER (PARTITION BY customer.c_nationkey ORDER BY customer.c_acctbal) AS two_next_value, + SUM(customer.c_acctbal) OVER (PARTITION BY nation.n_regionkey ORDER BY customer.c_acctbal ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS relsum_value, + SUM(customer.c_acctbal) OVER (ORDER BY customer.c_acctbal ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS relsum_value2, + customer.c_acctbal / AVG(customer.c_acctbal) OVER (ORDER BY customer.c_acctbal ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS relavg_value, + customer.c_acctbal / COUNT(CASE WHEN customer.c_acctbal > 0.0 THEN customer.c_acctbal ELSE NULL END) OVER () AS relcount_value, + customer.c_acctbal / COUNT(*) OVER () AS relsize_value +FROM tpch.region AS region +JOIN tpch.nation AS nation + ON nation.n_regionkey = region.r_regionkey +JOIN tpch.customer AS customer + ON customer.c_nationkey = nation.n_nationkey diff --git a/tests/test_sql_refsols/window_sliding_frame_relsize_snowflake.sql b/tests/test_sql_refsols/window_sliding_frame_relsize_snowflake.sql new file mode 100644 index 000000000..842523261 --- /dev/null +++ b/tests/test_sql_refsols/window_sliding_frame_relsize_snowflake.sql @@ -0,0 +1,16 @@ +SELECT + sbtransaction.sbtxid AS transaction_id, + COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w1, + COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) AS w2, + COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w5, + COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS w6, + COUNT(*) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w7, + COUNT(*) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 3 PRECEDING AND 5 FOLLOWING) AS w8 +FROM main.sbcustomer AS sbcustomer +JOIN main.sbtransaction AS sbtransaction + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid +ORDER BY + sbtransaction.sbtxdatetime NULLS FIRST +LIMIT 8 diff --git a/tests/test_sql_refsols/window_sliding_frame_relsum_snowflake.sql b/tests/test_sql_refsols/window_sliding_frame_relsum_snowflake.sql new file mode 100644 index 000000000..449e8361d --- /dev/null +++ b/tests/test_sql_refsols/window_sliding_frame_relsum_snowflake.sql @@ -0,0 +1,16 @@ +SELECT + sbtransaction.sbtxid AS transaction_id, + SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w1, + SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND 4 FOLLOWING) AS w2, + SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w3, + SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) AS w4, + SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w5, + SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) AS w6, + SUM(sbtransaction.sbtxshares) OVER (ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w7, + SUM(sbtransaction.sbtxshares) OVER (PARTITION BY sbtransaction.sbtxcustid ORDER BY sbtransaction.sbtxdatetime, sbtransaction.sbtxid ROWS BETWEEN 5 PRECEDING AND 1 PRECEDING) AS w8 +FROM main.sbcustomer AS sbcustomer +JOIN main.sbtransaction AS sbtransaction + ON sbcustomer.sbcustid = sbtransaction.sbtxcustid +ORDER BY + sbtransaction.sbtxdatetime NULLS FIRST +LIMIT 8 diff --git a/tests/testing_utilities.py b/tests/testing_utilities.py index 3a178cc2d..dcbd068fb 100644 --- a/tests/testing_utilities.py +++ b/tests/testing_utilities.py @@ -1043,6 +1043,12 @@ def run_e2e_test( result = result.sort_values(by=list(result.columns)).reset_index(drop=True) refsol = refsol.sort_values(by=list(refsol.columns)).reset_index(drop=True) + # Harmonize types between result and reference solution + if coerce_types: + for col_name in result.columns: + result[col_name], refsol[col_name] = harmonize_types( + result[col_name], refsol[col_name] + ) # Perform the comparison between the result and the reference solution if coerce_types: for col_name in result.columns: @@ -1380,6 +1386,12 @@ def harmonize_types(column_a, column_b): return column_a.apply(lambda x: "" if pd.isna(x) else str(x)), column_b.apply( lambda x: "" if pd.isna(x) else str(x) ) + # float vs None. Convert to nullable floats + if any(isinstance(elem, (float, NoneType)) for elem in column_a) and any( + isinstance(elem, (float, NoneType)) for elem in column_b + ): + return column_a.astype("Float64"), column_b.astype("Float64") + if any(isinstance(elem, Decimal) for elem in column_a) and any( isinstance(elem, int) for elem in column_b ):