diff --git a/.gitignore b/.gitignore index b6e47617d..8c8200842 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,4 @@ dmypy.json # Pyre type checker .pyre/ +scratch/ \ No newline at end of file diff --git a/Project1/README.md b/Project1/README.md new file mode 100644 index 000000000..9601bab0d --- /dev/null +++ b/Project1/README.md @@ -0,0 +1,102 @@ +# Project 1 + +### How many users do we have? +130 + +```sql +SELECT COUNT(user_guid) FROM dev_db.dbt_danieloutschoolcom.stg_postgres__users; +``` + +### On average, how many orders do we receive per hour? +5.416668 + +```sql +WITH orders_received_hourly as ( +SELECT date_trunc('hour', created_at) as hour_received +, COUNT(*) as num_received_this_hour +FROM dev_db.dbt_danieloutschoolcom.stg_postgres__orders +GROUP BY 1 +) + +SELECT AVG(num_received_this_hour) as average_num_orders_received_hourly +FROM orders_received_hourly; +``` + +### On average, how long does an order take from being placed to being delivered? + +93.4 + +```sql +with delivery_hours as +( + SELECT created_at + , delivered_at + , datediff(hour, created_at, delivered_at) as hours_to_deliver + FROM dev_db.dbt_danieloutschoolcom.stg_postgres__orders + WHERE status = 'delivered' +) + +SELECT round(AVG(hours_to_deliver), 2) +FROM delivery_hours +; +``` + +### How many users have only made one purchase? Two purchases? Three+ purchases? +1 purchase = 25 users +2 purchases = 28 users +3 or more purchass = 71 + +```sql +WITH orders_per_user_table as ( + SELECT user_guid + , COUNT(distinct order_guid) as orders_per_user + FROM dev_db.dbt_danieloutschoolcom.stg_postgres__orders + GROUP BY user_guid +) + +SELECT orders_per_user +, COUNT(distinct user_guid) as users_with_this_many_orders +FROM orders_per_user_table +GROUP BY orders_per_user +; +``` + +```sql +WITH orders_per_user_table as ( + SELECT user_guid + , COUNT(distinct order_guid) as orders_per_user + FROM dev_db.dbt_danieloutschoolcom.stg_postgres__orders + GROUP BY user_guid +), + +user_order_counts as ( + SELECT orders_per_user + , COUNT(distinct user_guid) as num_users_with_this_many_orders + FROM orders_per_user_table + GROUP BY orders_per_user +) + +SELECT SUM(num_users_with_this_many_orders) as num_users_with_three_or_more_orders +FROM user_order_counts +WHERE orders_per_user >= 3 +; +``` + +### Note: you should consider a purchase to be a single order. In other words, if a user places one order for 3 products, they are considered to have made 1 purchase. + +### On average, how many unique sessions do we have per hour? + +16.33 + +```sql +WITH unique_sessions_per_hour as ( + SELECT date_trunc(hour, created_at) as created_hour + , COUNT(distinct session_guid) as sessions_per_hour + FROM dev_db.dbt_danieloutschoolcom.stg_postgres__events + GROUP BY created_hour +) + +SELECT round(AVG(sessions_per_hour), 2) +FROM unique_sessions_per_hour +; +``` diff --git a/greenery/.gitignore b/greenery/.gitignore new file mode 100644 index 000000000..49f147cb9 --- /dev/null +++ b/greenery/.gitignore @@ -0,0 +1,4 @@ + +target/ +dbt_packages/ +logs/ diff --git a/greenery/README.md b/greenery/README.md new file mode 100644 index 000000000..7874ac842 --- /dev/null +++ b/greenery/README.md @@ -0,0 +1,15 @@ +Welcome to your new dbt project! + +### Using the starter project + +Try running the following commands: +- dbt run +- dbt test + + +### Resources: +- Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) +- Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers +- Join the [chat](https://community.getdbt.com/) on Slack for live discussions and support +- Find [dbt events](https://events.getdbt.com) near you +- Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices diff --git a/greenery/analyses/.gitkeep b/greenery/analyses/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/dbt_project.yml b/greenery/dbt_project.yml new file mode 100644 index 000000000..1439d0bc9 --- /dev/null +++ b/greenery/dbt_project.yml @@ -0,0 +1,38 @@ + +# Name your project! Project names should contain only lowercase characters +# and underscores. A good package name should reflect your organization's +# name or the intended use of these models +name: 'greenery' +version: '1.0.0' +config-version: 2 + +# This setting configures which "profile" dbt uses for this project. +profile: 'greenery' + +# These configurations specify where dbt should look for different types of files. +# The `model-paths` config, for example, states that models in this project can be +# found in the "models/" directory. You probably won't need to change these! +model-paths: ["models"] +analysis-paths: ["analyses"] +test-paths: ["tests"] +seed-paths: ["seeds"] +macro-paths: ["macros"] +snapshot-paths: ["snapshots"] + +target-path: "target" # directory which will store compiled SQL files +clean-targets: # directories to be removed by `dbt clean` + - "target" + - "dbt_packages" + + +# Configuring models +# Full documentation: https://docs.getdbt.com/docs/configuring-models + +# In this example config, we tell dbt to build all models in the example/ +# directory as views. These settings can be overridden in the individual model +# files using the `{{ config(...) }}` macro. +models: + greenery: + # Config indicated by + and applies to all files under models/example/ + example: + +materialized: view diff --git a/greenery/macros/.gitkeep b/greenery/macros/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/models/example/my_first_dbt_model.sql b/greenery/models/example/my_first_dbt_model.sql new file mode 100644 index 000000000..f31a12d94 --- /dev/null +++ b/greenery/models/example/my_first_dbt_model.sql @@ -0,0 +1,27 @@ + +/* + Welcome to your first dbt model! + Did you know that you can also configure models directly within SQL files? + This will override configurations stated in dbt_project.yml + + Try changing "table" to "view" below +*/ + +{{ config(materialized='table') }} + +with source_data as ( + + select 1 as id + union all + select null as id + +) + +select * +from source_data + +/* + Uncomment the line below to remove records with null `id` values +*/ + +-- where id is not null diff --git a/greenery/models/example/my_second_dbt_model.sql b/greenery/models/example/my_second_dbt_model.sql new file mode 100644 index 000000000..c91f8793a --- /dev/null +++ b/greenery/models/example/my_second_dbt_model.sql @@ -0,0 +1,6 @@ + +-- Use the `ref` function to select from other models + +select * +from {{ ref('my_first_dbt_model') }} +where id = 1 diff --git a/greenery/models/example/schema.yml b/greenery/models/example/schema.yml new file mode 100644 index 000000000..e2aef43ca --- /dev/null +++ b/greenery/models/example/schema.yml @@ -0,0 +1,20 @@ + +version: 2 + +models: + - name: my_first_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + + - name: my_second_dbt_model + description: "A starter dbt model" + columns: + - name: id + description: "The primary key for this table" + tests: + - unique + - not_null diff --git a/greenery/models/staging/postgres/_postgres__models.yml b/greenery/models/staging/postgres/_postgres__models.yml new file mode 100644 index 000000000..18e3029a8 --- /dev/null +++ b/greenery/models/staging/postgres/_postgres__models.yml @@ -0,0 +1,245 @@ + +version: 2 + +models: + - name: stg_postgres__addresses + description: "Addresses (dbt stage model)" + columns: + - name: address_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: address + description: "Street address" + tests: + - not_null + - name: zipcode + description: "Zip / Postal Code" + - name: state + description: "State / Provice / Region" + - name: country + description: "Country" + tests: + - not_null + config: + column_types: + address_id: varchar(256) + address: varchar(256) + zipcode: int + state: varchar(256) + country: varchar(256) + + + + - name: stg_postgres__events + description: "Events (Analytics) (dbt stage model)" + columns: + - name: event_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: session_id + description: "Session this event belongs to (FK)" + tests: + - not_null + - name: user_id + description: "User who triggered event (FK)" + - name: event_type + description: "Type of analytics event" + - name: page_url + description: "URL of page this event occured on" + - name: created_at + description: "Datetime this event was triggered" + tests: + - not_null + - name: order_id + description: "Order attached to this event (FK)" + - name: product_id + description: "Product attached to this event (FK)" + config: + column_types: + event_id: varchar(256) + sesson_id: varchar(256) + user_id: varchar(256) + event_type: varchar(256) + page_url: varchar(256) + created_at: timestamp + order_id: varchar(256) + product_id: varchar(256) + + + + - name: stg_postgres__order_items + description: "Line items as part of an order (dbt stage model)" + columns: + - name: order_item_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: order_id + description: "The order this order item is attachd to (FK)" + tests: + - not_null + - name: product_id + description: "The product this order item is attached to (FK)" + tests: + - not_null + - name: quantity + description: "The quantity of this order item within the order it is attached to" + tests: + - not_null + config: + column_types: + order_item_id: varchar(256) + order_id: varchar(256) + product_id: varchar(256) + quantity: int + + + + - name: stg_postgres__orders + description: "Orders (dbt stage model)" + columns: + - name: order_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: promo_id + description: "The promotional campaign this order is attachd to (FK)" + - name: user_id + description: "The user this order is attached to (FK)" + tests: + - not_null + - name: address_id + description: "The address this order should be delivered to (FK)" + - name: created_at + description: "Datetime this order was created" + tests: + - not_null + - name: order_cost + description: "Cost of the order" + tests: + - not_null + - name: shipping_cost + description: "Shipping Cost of the order" + tests: + - not_null + - name: order_total + description: "Total Cost of the order" + tests: + - not_null + - name: tracking_id + description: "The tracking number attached to the shipment of this order" + - name: shipping_service + description: "The shipping service used for this order" + - name: estimated_delivery_at + description: "Datetime that the order is estimated to be delivered at" + - name: status + description: "The status of the order" + config: + column_types: + order_id: varchar(256) + promo_id: varchar(256) + user_id: varchar(256) + address_id: varchar(256) + created_at: timestamp + order_cost: float + shipping_cost: float + order_total: float + tracking_id: varchar(256) + shipping_service: varchar(256) + estimated_delivery_at: timestamp + delivered_at: timestamp + status: varchar(256) + + - name: stg_postgres__products + description: "Products (dbt stage model)" + columns: + - name: product_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: name + description: "Name of the product" + tests: + - not_null + - name: price + description: "The price of the product" + tests: + - not_null + - name: inventory + description: "The current inventory of the product" + tests: + - not_null + config: + column_types: + product_id: varchar(256) + name: varchar(256) + price: float + inventory: int + + + + - name: stg_postgres__promos + description: "Promotional campaigns (dbt stage model)" + columns: + - name: promo_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: discount + description: "The discount amount attached to this promotion" + tests: + - not_null + - name: status + description: "The current status of the promotional campaign" + tests: + - not_null + config: + column_types: + promo_id: varchar(256) + discount: int + status: varchar(256) + + + - name: stg_postgres__users + description: "Users (dbt stage model)" + columns: + - name: user_id + description: "The primary key for this table (PK)" + tests: + - unique + - not_null + - name: first_name + description: "The user's first name" + tests: + - not_null + - name: last_name + description: "The user's last name" + - name: email + description: "The user's email" + - name: phone_number + description: "The user's phone number" + - name: created_at + description: "Datetime this user signed up" + - name: updated_at + description: "Datetime this user record most recently was updated" + - name: address_id + description: "The address attached to this user (FK)" + config: + column_types: + user_id: varchar(256) + first_name: varchar(256) + last_name: varchar(256) + email: varchar(256) + phone_number: varchar(256) + creatd_at: timestamp + updated_at: timestamp + address_id: varchar(256) + diff --git a/greenery/models/staging/postgres/_postgres__sources.yml b/greenery/models/staging/postgres/_postgres__sources.yml new file mode 100644 index 000000000..54b381f69 --- /dev/null +++ b/greenery/models/staging/postgres/_postgres__sources.yml @@ -0,0 +1,19 @@ +version: 2 + +sources: + - name: postgres + database: raw + schema: public + tables: + - name: addresses + - name: events + - name: order_items + - name: orders + - name: products + - name: promos + - name: users + + + + + diff --git a/greenery/models/staging/postgres/stg_postgres__addresses.sql b/greenery/models/staging/postgres/stg_postgres__addresses.sql new file mode 100644 index 000000000..ce5641547 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__addresses.sql @@ -0,0 +1,23 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select address_id + , address + , state + , zipcode + , country from {{ source('postgres', 'addresses') }} +) + +, renamed_recast as ( + select + address_id as address_guid + , address + , state + , lpad(zipcode, 5, 0) as zip_code + , country + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__events.sql b/greenery/models/staging/postgres/stg_postgres__events.sql new file mode 100644 index 000000000..5c90ab60f --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__events.sql @@ -0,0 +1,29 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select event_id + , session_id + , user_id + , page_url + , created_at + , event_type + , order_id + , product_id from {{ source('postgres', 'events') }} +) + +, renamed_recast as ( + select + event_id as event_guid + , session_id as session_guid + , user_id as user_guid + , page_url + , created_at + , event_type + , order_id as order_guid + , product_id as product_guid + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__order_items.sql b/greenery/models/staging/postgres/stg_postgres__order_items.sql new file mode 100644 index 000000000..8ee3838ff --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__order_items.sql @@ -0,0 +1,19 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select order_id + , product_id + , quantity from {{ source('postgres', 'order_items') }} +) + +, renamed_recast as ( + select + order_id as order_guid + , product_id as product_guid + , quantity + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__orders.sql b/greenery/models/staging/postgres/stg_postgres__orders.sql new file mode 100644 index 000000000..e47493752 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__orders.sql @@ -0,0 +1,39 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select order_id + , user_id + , promo_id + , address_id + , created_at + , order_cost + , shipping_cost + , order_total + , tracking_id + , shipping_service + , estimated_delivery_at + , delivered_at + , status from {{ source('postgres', 'orders') }} +) + +, renamed_recast as ( + select + order_id as order_guid + , user_id as user_guid + , promo_id as promo_guid + , address_id as address_guid + , created_at + , order_cost + , shipping_cost + , order_total + , tracking_id as tracking_guid + , shipping_service + , estimated_delivery_at + , delivered_at + , status + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__products.sql b/greenery/models/staging/postgres/stg_postgres__products.sql new file mode 100644 index 000000000..5931bf439 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__products.sql @@ -0,0 +1,21 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select product_id + , name + , price + , inventory from {{ source('postgres', 'products') }} +) + +, renamed_recast as ( + select + product_id as product_guid + , name + , price + , inventory + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__promos.sql b/greenery/models/staging/postgres/stg_postgres__promos.sql new file mode 100644 index 000000000..f020b2eea --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__promos.sql @@ -0,0 +1,19 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select promo_id + , discount + , status from {{ source('postgres', 'promos') }} +) + +, renamed_recast as ( + select + promo_id as promo_guid + , discount + , status + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/models/staging/postgres/stg_postgres__users.sql b/greenery/models/staging/postgres/stg_postgres__users.sql new file mode 100644 index 000000000..f2b82e734 --- /dev/null +++ b/greenery/models/staging/postgres/stg_postgres__users.sql @@ -0,0 +1,29 @@ +{{ + config(materialized = 'table') +}} + +with source as ( + select user_id + , first_name + , last_name + , email + , phone_number + , created_at + , updated_at + , address_id from {{ source('postgres', 'users') }} +) + +, renamed_recast as ( + select + user_id as user_guid + , first_name + , last_name + , email + , phone_number + , created_at + , updated_at + , address_id as address_guid + from source +) + +select * from renamed_recast \ No newline at end of file diff --git a/greenery/seeds/.gitkeep b/greenery/seeds/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/.gitkeep b/greenery/snapshots/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/greenery/snapshots/snap_postgres__products.sql b/greenery/snapshots/snap_postgres__products.sql new file mode 100644 index 000000000..cff186397 --- /dev/null +++ b/greenery/snapshots/snap_postgres__products.sql @@ -0,0 +1,16 @@ +{% snapshot inventory_snapshot %} + +{{ + config( + target_database = target.database, + target_schema = target.schema, + strategy='check', + unique_key='product_id', + check_cols=['inventory'], + ) +}} + + +select * from {{ source('postgres', 'products') }} + +{% endsnapshot %} \ No newline at end of file diff --git a/greenery/tests/.gitkeep b/greenery/tests/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/scratch/scratch b/scratch/scratch new file mode 100644 index 000000000..a33387d27 --- /dev/null +++ b/scratch/scratch @@ -0,0 +1 @@ +SELECT * FROM dev_db.dbt_danieloutschoolcom.stg_postgres__orders;