Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion 02_activities/assignments/Assignment2.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,17 +44,29 @@ Additionally, include a date table.
There are several tools online you can use, I'd recommend [Draw.io](https://www.drawio.com/) or [LucidChart](https://www.lucidchart.com/pages/).

**HINT:** You do not need to create any data for this prompt. This is a conceptual model only.
# This is the pdf for this prompt:
[text](Bookstore1.drawio.pdf)

#### Prompt 2
We want to create employee shifts, splitting up the day into morning and evening. Add this to the ERD.
# This is the pdf for this prompt:
[text](Bookstore2.drawio.pdf)

#### Prompt 3
The store wants to keep customer addresses. Propose two architectures for the CUSTOMER_ADDRESS table, one that will retain changes, and another that will overwrite. Which is type 1, which is type 2?

**HINT:** search type 1 vs type 2 slowly changing dimensions.

```
Your answer...
The two different types of architectures related to the concept of Slowly Changing Dimensions (SCD) can be used to store a CUSTOMER_ADDRESS table are:

Overwrite (Type 1 SCD): simply overwrites an existing record with the new data.
Only the latest address is stored. This is simple to implement and maintain, and requires less storage space as no historical data is kept. That no historical data is kept is also the downside since the system can't track changes over time.

Add New Row (Type 2 SCD): a new row is added for each change, while the old one is retained, which allows you to maintain a complete history of address changes for each customer. If knowing about address changes is important then this is obviously useful; however, it is more complex and does require more storage space.



```

***
Expand Down
Binary file added 02_activities/assignments/Bookstore1.drawio.pdf
Binary file not shown.
Binary file added 02_activities/assignments/Bookstore2.drawio.pdf
Binary file not shown.
196 changes: 189 additions & 7 deletions 02_activities/assignments/assignment2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ HINT: keep the syntax the same, but edited the correct components with the strin
The `||` values concatenate the columns into strings.
Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed.
All the other rows will remain the same.) */

SELECT
product_name || ', ' ||
COALESCE(product_size, '') || ' (' ||
COALESCE(product_qty_type, 'unit') || ')'
FROM product;


--Windowed Functions
Expand All @@ -32,17 +36,63 @@ each new market date for each customer, or select only the unique market dates p
(without purchase details) and number those visits.
HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */

--Using Row_number:
SELECT
customer_id,
market_date,
product_id,
quantity,
cost_to_customer_per_qty,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number
FROM customer_purchases;

--Using Dense_rank:
SELECT
customer_id,
market_date,
DENSE_RANK() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number
FROM
(SELECT DISTINCT customer_id, market_date FROM customer_purchases) AS unique_visits;


/* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1,
then write another query that uses this one as a subquery (or temp table) and filters the results to
only the customer’s most recent visit. */

--Reverse the numbering
SELECT
customer_id,
market_date,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date DESC) AS visit_number
FROM customer_purchases;

--Using a sub-query:
SELECT
customer_id,
market_date,
visit_number
FROM
(
SELECT
customer_id,
market_date,
ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date DESC) AS visit_number
FROM
customer_purchases
) AS subquery
WHERE
visit_number = 1;


/* 3. Using a COUNT() window function, include a value along with each row of the
customer_purchases table that indicates how many different times that customer has purchased that product_id. */

SELECT
customer_id,
product_id,
market_date,
quantity,
cost_to_customer_per_qty,
COUNT(*) OVER (PARTITION BY customer_id, product_id) AS purchase_count
FROM customer_purchases;


-- String manipulations
Expand All @@ -57,10 +107,23 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for

Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */

SELECT
product_name,
CASE
WHEN INSTR(product_name, '-') > 0 THEN
TRIM(SUBSTR(product_name, INSTR(product_name, '-') + 1))
ELSE
NULL
END AS description
FROM product;


/* 2. Filter the query to show any product_size value that contain a number with REGEXP. */

SELECT
product_name,
product_size
FROM product
WHERE product_size REGEXP '[0-9]';


-- UNION
Expand All @@ -73,7 +136,34 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling
3) Query the second temp table twice, once for the best day, once for the worst day,
with a UNION binding them. */


WITH SalesPerDate AS (
SELECT
market_date,
SUM(quantity * cost_to_customer_per_qty) AS total_sales
FROM customer_purchases
GROUP BY market_date
),
RankedSales AS (
SELECT
market_date,
total_sales,
RANK() OVER (ORDER BY total_sales DESC) AS rank_desc,
RANK() OVER (ORDER BY total_sales ASC) AS rank_asc
FROM SalesPerDate
)
SELECT
market_date,
total_sales,
'Best Day' AS sales_category
FROM RankedSales
WHERE rank_desc = 1
UNION
SELECT
market_date,
total_sales,
'Worst Day' AS sales_category
FROM RankedSales
WHERE rank_asc = 1;


/* SECTION 3 */
Expand All @@ -89,26 +179,94 @@ Think a bit about the row counts: how many distinct vendors, product names are t
How many customers are there (y).
Before your final group by you should have the product of those two queries (x*y). */



WITH VendorProduct AS (
SELECT
v.vendor_name,
p.product_name,
vi.original_price AS price
FROM
vendor_inventory vi
JOIN
vendor v ON vi.vendor_id = v.vendor_id
JOIN
product p ON vi.product_id = p.product_id
),
CustomerCount AS (
SELECT
COUNT(*) AS customer_count
FROM
customer
)
SELECT
vp.vendor_name,
vp.product_name,
SUM(5 * vp.price * cc.customer_count) AS total_revenue
FROM
VendorProduct vp,
CustomerCount cc
GROUP BY
vp.vendor_name, vp.product_name;


-- INSERT
/*1. Create a new table "product_units".
This table will contain only products where the `product_qty_type = 'unit'`.
It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`.
Name the timestamp column `snapshot_timestamp`. */

CREATE TABLE product_units AS
SELECT
*,
CURRENT_TIMESTAMP AS snapshot_timestamp
FROM
product
WHERE
product_qty_type = 'unit';


/*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp).
This can be any product you desire (e.g. add another record for Apple Pie). */

INSERT INTO product_units (
product_id,
product_name,
product_size,
product_category_id,
product_qty_type,
snapshot_timestamp
)
VALUES (
1001,
'Apple Pie',
'10"',
3,
'unit',
CURRENT_TIMESTAMP
);


-- DELETE
/* 1. Delete the older record for the whatever product you added.

HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/
--Identify the product_id of the older record:

SELECT
product_id,
product_name,
snapshot_timestamp
FROM
product_units
WHERE
product_name = 'Apple Pie'
ORDER BY
snapshot_timestamp ASC;

--Delete older record:
DELETE FROM
product_units
WHERE
product_id = 1001;


-- UPDATE
Expand All @@ -128,6 +286,30 @@ Finally, make sure you have a WHERE statement to update the right row,
you'll need to use product_units.product_id to refer to the correct row within the product_units table.
When you have all of these components, you can run the update statement. */

-- 1: Add the column
ALTER TABLE product_units
ADD current_quantity INT;

-- 2: Get the last quantity per product using ROW_NUMBER()
WITH LastQuantity AS (
SELECT
vi.product_id,
COALESCE(vi.quantity, 0) AS last_quantity,
ROW_NUMBER() OVER (PARTITION BY vi.product_id ORDER BY vi.market_date DESC) AS row_num
FROM
vendor_inventory vi
)
-- 3: Update the current_quantity in product_units
UPDATE product_units
SET current_quantity = (
SELECT
lq.last_quantity
FROM
LastQuantity lq
WHERE
lq.product_id = product_units.product_id
AND lq.row_num = 1
);



47 changes: 47 additions & 0 deletions 04_this_cohort/live_code/module_4/FULL_OUTER_JOIN_UNION.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
--FULL OUTER JOIN with a UNION

DROP TABLE IF EXISTS temp.store1;
CREATE TEMP TABLE IF NOT EXISTS temp.store1
(
costume TEXT,
quantity INT
);

INSERT INTO temp.store1
VALUES("tiger",6),
("elephant",2),
("princess", 4);


DROP TABLE IF EXISTS temp.store2;
CREATE TEMP TABLE IF NOT EXISTS temp.store2
(
costume TEXT,
quantity INT
);

INSERT INTO temp.store2
VALUES("tiger",2),
("dancer",7),
("superhero", 5);

SELECT
s1.costume
,s1.quantity as store1_quantity
,s2.quantity as store2_quantity

FROM store1 s1
LEFT JOIN store2 s2
ON s1.costume = s2.costume

UNION ALL

SELECT
s2.costume
,s1.quantity
,s2.quantity

FROM store2 s2
LEFT JOIN store1 s1
ON s1.costume = s2.costume
WHERE s1.costume IS NULL
23 changes: 23 additions & 0 deletions 04_this_cohort/live_code/module_4/IFNULL_NULLIF.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
--IFNULL and coalesce + NULLIF

SELECT *
,IFNULL(product_size, 'Unknown') as new_product_size

--less meaningful "conceptual"
,IFNULL(product_size, product_qty_type) -- both null, results with null
,coalesce(product_size,product_qty_type,'missing') -- if first the value is null, the second is null, then do the third value
,IFNULL(IFNULL(product_size, product_qty_type),'missing') -- same but have to wrap within


FROM product;

SELECT *
,IFNULL(product_size, 'Unknown') as new_product_size
,NULLIF(product_size, '') -- finding the values that product_size column is "blank" and setting them to null
,coalesce(NULLIF(product_size, ''), 'unknown') as good_product_size
,IFNULL(NULLIF(product_size, ''), 'unknown') as good_product_size

FROM product

WHERE NULLIF(product_size, '') IS NULL -- both blanks and NULLs
--WHERE product_size IS NULL -- only NULLs
Loading