Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 24 additions & 1 deletion 02_activities/assignments/Assignment2.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,30 @@ The store wants to keep customer addresses. Propose two architectures for the CU
**HINT:** search type 1 vs type 2 slowly changing dimensions.

```
Your answer...
CUSTOMER_ADDRESS table
Architecture 1: Retaining Changes (Type2)
customer_id (Primary Key)
address_line1
address_line2
city
province
postal_code
country
updated_date

Architecture 2: Retaining Changes (Type2)
address_id (Primary Key)
customer_id
address_line1
address_line2
city
province
postal_code
country
effective_date
end_date
is_current

```

***
Expand Down
214 changes: 213 additions & 1 deletion 02_activities/assignments/assignment2.sql
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,20 @@ The `||` values concatenate the columns into strings.
Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed.
All the other rows will remain the same.) */

--zero problem
SELECT
product_name || ', ' || product_size|| ' (' || product_qty_type || ')'
FROM product;

--first problem
SELECT
product_name || ',' || coalesce(product_size,'')|| ' (' ||coalesce(product_qty_type,'')|| ')' as Product_name_size_qty_type
FROM product;

--second problem
SELECT
product_name || ',' || coalesce(product_size,'')|| ' (' ||coalesce(product_qty_type,'unit')|| ')' as Product_name_size_qty_type
FROM product;


--Windowed Functions
Expand All @@ -32,17 +46,86 @@ each new market date for each customer, or select only the unique market dates p
(without purchase details) and number those visits.
HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */

--ROW-NUMBER
SELECT*
,row_number() OVER(PARTITION BY customer_id ORDER BY market_date) as customer_visit_number


FROM customer_purchases
GROUP BY customer_id, market_date
ORDER BY customer_id, market_date


--dense_rank
SELECT
customer_id,
market_date
,dense_rank() OVER(PARTITION BY customer_id ORDER BY market_date) as customer_visit_number


FROM customer_purchases
ORDER BY customer_id, market_date;

/* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1,
then write another query that uses this one as a subquery (or temp table) and filters the results to
only the customer’s most recent visit. */

--ROW-NUMBER
SELECT
product_id,
vendor_id,
customer_id,
market_date as recent_visit_market_date,
quantity,
cost_to_customer_per_qty,
transaction_time,
x.customer_visit_number

FROM(
SELECT*
,row_number() OVER(PARTITION BY customer_id ORDER BY market_date DESC) as customer_visit_number


FROM customer_purchases
GROUP BY customer_id, market_date
ORDER BY customer_id, market_date DESC
)x

WHERE x.customer_visit_number=1;


--dense_rank
SELECT
product_id,
vendor_id,
customer_id,
market_date as recent_visit_market_date,
quantity,
cost_to_customer_per_qty,
transaction_time,
x.customer_visit_number
FROM(
SELECT*
,dense_rank() OVER(PARTITION BY customer_id ORDER BY market_date DESC) as customer_visit_number


FROM customer_purchases
ORDER BY customer_id, market_date DESC
)x

WHERE x.customer_visit_number=1;



/* 3. Using a COUNT() window function, include a value along with each row of the
customer_purchases table that indicates how many different times that customer has purchased that product_id. */

SELECT*
,COUNT(product_id) OVER(PARTITION BY customer_id,product_id)as number_of_purchase
FROM customer_purchases

ORDER BY customer_id,product_id, market_date



-- String manipulations
Expand All @@ -57,8 +140,31 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for

Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */

SELECT*,
CASE
WHEN instr(product_name,'-') THEN
substr(product_name,INSTR(product_name, '-')+2)
ELSE 'NULL'

END AS description

FROM product



/* 2. Filter the query to show any product_size value that contain a number with REGEXP. */
SELECT*,

CASE
WHEN instr(product_name,'-') THEN
substr(product_name,INSTR(product_name, '-')+2)
ELSE 'NULL'

END AS description

FROM product
WHERE product_size REGEXP'^[0-9]'


-- UNION
/* 1. Using a UNION, write a query that displays the market dates with the highest and lowest total sales.
Expand All @@ -71,6 +177,48 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling
with a UNION binding them. */


---1.creating a table that calculates total_sales
DROP TABLE IF EXISTS temp.total_sales

CREATE TABLE temp.total_sales AS
SELECT*
FROM(
SELECT*
,ROUND(SUM(quantity*cost_to_customer_per_qty),2)as total_sales
---,ROW_NUMBER() OVER(PARTITION BY market_date ORDER BY SUM(quantity*cost_to_customer_per_qty)) AS total_sales_count
--,RANK()OVER(PARTITION BY market_date ORDER BY SUM(quantity*cost_to_customer_per_qty) ) as sales_rank
FROM customer_purchases
GROUP BY market_date
)X

---2.creating a second table that ranks sales
DROP TABLE IF EXISTS temp.rank_sales

CREATE TABLE temp.rank_sales AS
SELECT*
,RANK()OVER(ORDER BY total_sales) AS low_to_high_sale
,RANK()OVER (ORDER BY total_sales DESC) AS high_to_low_sale

FROM temp.total_sales


--3. creating UNION to list the highest and lowest total sales

SELECT
market_date,
total_sales,
high_to_low_sale as sale_rank
FROM temp.rank_sales
WHERE low_to_high_sale =1

UNION

SELECT
market_date,
total_sales,
high_to_low_sale as sale_rank
FROM temp.rank_sales
WHERE high_to_low_sale =1


/* SECTION 3 */
Expand All @@ -86,25 +234,65 @@ Think a bit about the row counts: how many distinct vendors, product names are t
How many customers are there (y).
Before your final group by you should have the product of those two queries (x*y). */


SELECT
vendor_name,
product_name,
SUM(product_cost) earning_per_product
FROM(
SELECT
vendor_name, product_name, SUM(price)as product_cost
FROM(
SELECT DISTINCT
vendor_name,
product_name,
5*original_price AS price
FROM vendor_inventory vi
INNER JOIN product p
ON vi.product_id = p.product_id
INNER JOIN vendor v
ON v.vendor_id = vi.vendor_id
)
GROUP BY vendor_name, product_name)

CROSS JOIN
(SELECT DISTINCT
customer_id
FROM customer)

GROUP BY vendor_name, product_name

-- INSERT
/*1. Create a new table "product_units".
This table will contain only products where the `product_qty_type = 'unit'`.
It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`.
Name the timestamp column `snapshot_timestamp`. */

DROP TABLE IF EXISTS temp.product_units

CREATE TABLE product_units AS

SELECT*
,CURRENT_TIMESTAMP as snapshot_timestamp
FROM product
WHERE product_qty_type = 'unit'



/*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp).
This can be any product you desire (e.g. add another record for Apple Pie). */


INSERT INTO product_units
VALUES(24, 'Cut Zinnias Bouquet', 'large',4,'unit',CURRENT_TIMESTAMP)


-- DELETE
/* 1. Delete the older record for the whatever product you added.

HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/
DELETE
FROM product_units
WHERE product_id=6



Expand All @@ -125,6 +313,30 @@ Finally, make sure you have a WHERE statement to update the right row,
you'll need to use product_units.product_id to refer to the correct row within the product_units table.
When you have all of these components, you can run the update statement. */

ALTER TABLE product_units
ADD current_quantity INT;

---updating the product table
UPDATE product_units
SET current_quantity =
(SELECT current_quantity

FROM(
SELECT
pu.product_id,
coalesce(quantity,0) as current_quantity
FROM product_units as pu

LEFT JOIN(
SELECT*
,row_number()over(partition by vi.product_id order by market_date DESC) as date_row_number
from vendor_inventory as vi
)y
ON y.product_id = pu.product_id
WHERE date_row_number =1 OR date_row_number IS NULL

)x
WHERE x.product_id =product_units.product_id)



Binary file not shown.