diff --git a/02_activities/assignments/Assignment2.md b/02_activities/assignments/Assignment2.md
index 05244623b..d90a0944c 100644
--- a/02_activities/assignments/Assignment2.md
+++ b/02_activities/assignments/Assignment2.md
@@ -44,9 +44,13 @@ Additionally, include a date table.
There are several tools online you can use, I'd recommend [Draw.io](https://www.drawio.com/) or [LucidChart](https://www.lucidchart.com/pages/).
**HINT:** You do not need to create any data for this prompt. This is a conceptual model only.
+# This is the pdf for this prompt:
+[text](Bookstore1.drawio.pdf)
#### Prompt 2
We want to create employee shifts, splitting up the day into morning and evening. Add this to the ERD.
+# This is the pdf for this prompt:
+[text](Bookstore2.drawio.pdf)
#### Prompt 3
The store wants to keep customer addresses. Propose two architectures for the CUSTOMER_ADDRESS table, one that will retain changes, and another that will overwrite. Which is type 1, which is type 2?
@@ -54,7 +58,15 @@ The store wants to keep customer addresses. Propose two architectures for the CU
**HINT:** search type 1 vs type 2 slowly changing dimensions.
```
-Your answer...
+The two different types of architectures related to the concept of Slowly Changing Dimensions (SCD) can be used to store a CUSTOMER_ADDRESS table are:
+
+Overwrite (Type 1 SCD): simply overwrites an existing record with the new data.
+Only the latest address is stored. This is simple to implement and maintain, and requires less storage space as no historical data is kept. That no historical data is kept is also the downside since the system can't track changes over time.
+
+Add New Row (Type 2 SCD): a new row is added for each change, while the old one is retained, which allows you to maintain a complete history of address changes for each customer. If knowing about address changes is important then this is obviously useful; however, it is more complex and does require more storage space.
+
+
+
```
***
diff --git a/02_activities/assignments/Bookstore1.drawio.pdf b/02_activities/assignments/Bookstore1.drawio.pdf
new file mode 100644
index 000000000..fdb44b1fa
Binary files /dev/null and b/02_activities/assignments/Bookstore1.drawio.pdf differ
diff --git a/02_activities/assignments/Bookstore2.drawio.pdf b/02_activities/assignments/Bookstore2.drawio.pdf
new file mode 100644
index 000000000..788f010e9
Binary files /dev/null and b/02_activities/assignments/Bookstore2.drawio.pdf differ
diff --git a/02_activities/assignments/assignment2.sql b/02_activities/assignments/assignment2.sql
index 5ad40748a..726c96017 100644
--- a/02_activities/assignments/assignment2.sql
+++ b/02_activities/assignments/assignment2.sql
@@ -19,7 +19,11 @@ HINT: keep the syntax the same, but edited the correct components with the strin
The `||` values concatenate the columns into strings.
Edit the appropriate columns -- you're making two edits -- and the NULL rows will be fixed.
All the other rows will remain the same.) */
-
+SELECT
+ product_name || ', ' ||
+ COALESCE(product_size, '') || ' (' ||
+ COALESCE(product_qty_type, 'unit') || ')'
+FROM product;
--Windowed Functions
@@ -32,17 +36,63 @@ each new market date for each customer, or select only the unique market dates p
(without purchase details) and number those visits.
HINT: One of these approaches uses ROW_NUMBER() and one uses DENSE_RANK(). */
+--Using Row_number:
+SELECT
+ customer_id,
+ market_date,
+ product_id,
+ quantity,
+ cost_to_customer_per_qty,
+ ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number
+FROM customer_purchases;
+
+--Using Dense_rank:
+SELECT
+ customer_id,
+ market_date,
+ DENSE_RANK() OVER (PARTITION BY customer_id ORDER BY market_date) AS visit_number
+FROM
+ (SELECT DISTINCT customer_id, market_date FROM customer_purchases) AS unique_visits;
/* 2. Reverse the numbering of the query from a part so each customer’s most recent visit is labeled 1,
then write another query that uses this one as a subquery (or temp table) and filters the results to
only the customer’s most recent visit. */
-
+--Reverse the numbering
+SELECT
+ customer_id,
+ market_date,
+ ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date DESC) AS visit_number
+FROM customer_purchases;
+
+--Using a sub-query:
+SELECT
+ customer_id,
+ market_date,
+ visit_number
+FROM
+ (
+ SELECT
+ customer_id,
+ market_date,
+ ROW_NUMBER() OVER (PARTITION BY customer_id ORDER BY market_date DESC) AS visit_number
+ FROM
+ customer_purchases
+ ) AS subquery
+WHERE
+ visit_number = 1;
/* 3. Using a COUNT() window function, include a value along with each row of the
customer_purchases table that indicates how many different times that customer has purchased that product_id. */
-
+SELECT
+ customer_id,
+ product_id,
+ market_date,
+ quantity,
+ cost_to_customer_per_qty,
+ COUNT(*) OVER (PARTITION BY customer_id, product_id) AS purchase_count
+FROM customer_purchases;
-- String manipulations
@@ -57,10 +107,23 @@ Remove any trailing or leading whitespaces. Don't just use a case statement for
Hint: you might need to use INSTR(product_name,'-') to find the hyphens. INSTR will help split the column. */
+SELECT
+ product_name,
+ CASE
+ WHEN INSTR(product_name, '-') > 0 THEN
+ TRIM(SUBSTR(product_name, INSTR(product_name, '-') + 1))
+ ELSE
+ NULL
+ END AS description
+FROM product;
/* 2. Filter the query to show any product_size value that contain a number with REGEXP. */
-
+SELECT
+ product_name,
+ product_size
+FROM product
+WHERE product_size REGEXP '[0-9]';
-- UNION
@@ -73,7 +136,34 @@ HINT: There are a possibly a few ways to do this query, but if you're struggling
3) Query the second temp table twice, once for the best day, once for the worst day,
with a UNION binding them. */
-
+WITH SalesPerDate AS (
+ SELECT
+ market_date,
+ SUM(quantity * cost_to_customer_per_qty) AS total_sales
+ FROM customer_purchases
+ GROUP BY market_date
+),
+RankedSales AS (
+ SELECT
+ market_date,
+ total_sales,
+ RANK() OVER (ORDER BY total_sales DESC) AS rank_desc,
+ RANK() OVER (ORDER BY total_sales ASC) AS rank_asc
+ FROM SalesPerDate
+)
+SELECT
+ market_date,
+ total_sales,
+ 'Best Day' AS sales_category
+FROM RankedSales
+WHERE rank_desc = 1
+UNION
+SELECT
+ market_date,
+ total_sales,
+ 'Worst Day' AS sales_category
+FROM RankedSales
+WHERE rank_asc = 1;
/* SECTION 3 */
@@ -89,26 +179,104 @@ Think a bit about the row counts: how many distinct vendors, product names are t
How many customers are there (y).
Before your final group by you should have the product of those two queries (x*y). */
+-- Get vendor names, product names, and their prices
+WITH VendorProduct AS (
+ SELECT
+ v.vendor_name,
+ p.product_name,
+ vi.original_price AS price
+ FROM vendor_inventory vi
+ JOIN vendor v ON vi.vendor_id = v.vendor_id
+ JOIN product p ON vi.product_id = p.product_id
+),
+
+-- Get the count of customers
+CustomerCount AS (
+ SELECT
+ COUNT(*) AS customer_count
+ FROM customer
+),
+
+-- Cross join to simulate each product being sold to every customer
+VendorProductCustomer AS (
+ SELECT
+ vp.vendor_name,
+ vp.product_name,
+ vp.price,
+ cc.customer_count
+ FROM VendorProduct vp
+ CROSS JOIN CustomerCount cc
+)
+
+-- Calculate total revenue for each vendor and product
+SELECT
+ vendor_name,
+ product_name,
+ SUM(5 * price * customer_count) AS total_revenue
+FROM VendorProductCustomer
+GROUP BY vendor_name, product_name;
-
+
-- INSERT
/*1. Create a new table "product_units".
This table will contain only products where the `product_qty_type = 'unit'`.
It should use all of the columns from the product table, as well as a new column for the `CURRENT_TIMESTAMP`.
Name the timestamp column `snapshot_timestamp`. */
+CREATE TABLE product_units AS
+SELECT
+ *,
+ CURRENT_TIMESTAMP AS snapshot_timestamp
+FROM product
+WHERE product_qty_type = 'unit';
/*2. Using `INSERT`, add a new row to the product_units table (with an updated timestamp).
This can be any product you desire (e.g. add another record for Apple Pie). */
-
+INSERT INTO product_units (
+ product_id,
+ product_name,
+ product_size,
+ product_category_id,
+ product_qty_type,
+ snapshot_timestamp
+)
+VALUES (
+ 1001,
+ 'Apple Pie',
+ '10"',
+ 3,
+ 'unit',
+ CURRENT_TIMESTAMP
+);
+--Note: I gave the new record Apple Pie a different product_id to verify
+--that the last one was deleted in the next part.
-- DELETE
/* 1. Delete the older record for the whatever product you added.
HINT: If you don't specify a WHERE clause, you are going to have a bad time.*/
+--Identify the product_id of the older record:
+SELECT
+ product_id,
+ product_name,
+ snapshot_timestamp
+FROM product_units
+WHERE product_name = 'Apple Pie'
+ORDER BY snapshot_timestamp ASC
+LIMIT 1;
+
+--Delete older record:
+DELETE FROM product_units
+WHERE product_id = (
+ SELECT product_id
+ FROM product_units
+ WHERE product_name = 'Apple Pie'
+ ORDER BY snapshot_timestamp ASC
+ LIMIT 1
+ );
-- UPDATE
@@ -129,5 +297,38 @@ Finally, make sure you have a WHERE statement to update the right row,
When you have all of these components, you can run the update statement. */
+-- Add the column:
+ALTER TABLE product_units
+ADD current_quantity INT;
+
+-- Get the last quantity per product:
+WITH last_quantity AS (
+ SELECT
+ pu.product_id,
+ first_value(quantity) OVER (PARTITION BY pu.product_id ORDER BY market_date DESC) AS quantity
+ FROM
+ vendor_inventory vi
+ RIGHT JOIN
+ product_units pu
+ ON vi.product_id = pu.product_id
+)
+-- Update the current_quantity in product_units:
+UPDATE product_units
+SET current_quantity = COALESCE(last_quantity.quantity,0)
+ FROM
+ last_quantity
+ WHERE
+ product_units.product_id = last_quantity.product_id;
+
+--Check
+SELECT
+ product_id,
+ product_name,
+ product_size,
+ product_category_id,
+ product_qty_type,
+ current_quantity
+FROM
+ product_units;
diff --git a/04_this_cohort/live_code/module_4/FULL_OUTER_JOIN_UNION.sql b/04_this_cohort/live_code/module_4/FULL_OUTER_JOIN_UNION.sql
new file mode 100644
index 000000000..7bc45397d
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/FULL_OUTER_JOIN_UNION.sql
@@ -0,0 +1,47 @@
+--FULL OUTER JOIN with a UNION
+
+DROP TABLE IF EXISTS temp.store1;
+CREATE TEMP TABLE IF NOT EXISTS temp.store1
+(
+costume TEXT,
+quantity INT
+);
+
+INSERT INTO temp.store1
+VALUES("tiger",6),
+ ("elephant",2),
+ ("princess", 4);
+
+
+DROP TABLE IF EXISTS temp.store2;
+CREATE TEMP TABLE IF NOT EXISTS temp.store2
+(
+costume TEXT,
+quantity INT
+);
+
+INSERT INTO temp.store2
+VALUES("tiger",2),
+ ("dancer",7),
+ ("superhero", 5);
+
+SELECT
+s1.costume
+,s1.quantity as store1_quantity
+,s2.quantity as store2_quantity
+
+FROM store1 s1
+LEFT JOIN store2 s2
+ ON s1.costume = s2.costume
+
+UNION ALL
+
+SELECT
+s2.costume
+,s1.quantity
+,s2.quantity
+
+FROM store2 s2
+LEFT JOIN store1 s1
+ ON s1.costume = s2.costume
+WHERE s1.costume IS NULL
diff --git a/04_this_cohort/live_code/module_4/IFNULL_NULLIF.sql b/04_this_cohort/live_code/module_4/IFNULL_NULLIF.sql
new file mode 100644
index 000000000..f41eb06a0
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/IFNULL_NULLIF.sql
@@ -0,0 +1,23 @@
+--IFNULL and coalesce + NULLIF
+
+SELECT *
+,IFNULL(product_size, 'Unknown') as new_product_size
+
+--less meaningful "conceptual"
+,IFNULL(product_size, product_qty_type) -- both null, results with null
+,coalesce(product_size,product_qty_type,'missing') -- if first the value is null, the second is null, then do the third value
+,IFNULL(IFNULL(product_size, product_qty_type),'missing') -- same but have to wrap within
+
+
+FROM product;
+
+SELECT *
+,IFNULL(product_size, 'Unknown') as new_product_size
+,NULLIF(product_size, '') -- finding the values that product_size column is "blank" and setting them to null
+,coalesce(NULLIF(product_size, ''), 'unknown') as good_product_size
+,IFNULL(NULLIF(product_size, ''), 'unknown') as good_product_size
+
+FROM product
+
+WHERE NULLIF(product_size, '') IS NULL -- both blanks and NULLs
+--WHERE product_size IS NULL -- only NULLs
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_4/INTERSECT_EXCEPT.sql b/04_this_cohort/live_code/module_4/INTERSECT_EXCEPT.sql
new file mode 100644
index 000000000..01915c5bd
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/INTERSECT_EXCEPT.sql
@@ -0,0 +1,31 @@
+--INTERSECT / EXCEPT
+
+--product that have been sold (e.g are in customer_purchases and product)
+SELECT product_id
+FROM customer_purchases
+INTERSECT -- similar to the inner join in this case
+SELECT product_id
+FROM product;
+
+--products that NOT been sold (e.g. are NOT in customer_pruchases even though in product)
+-- add the name
+SELECT x. product_id, product_name
+FROM (
+ SELECT product_id
+ FROM product -- what products are NOT in customer_purchases
+ EXCEPT
+ SELECT product_id
+ FROM customer_purchases
+) x
+JOIN product p on x.product_id = p.product_id
+
+--NOTHING
+--direction matter a lot!
+SELECT product_id
+FROM customer_purchases -- what products are NOT in product -- NONE!
+EXCEPT
+SELECT product_id
+FROM product;
+
+
+
diff --git a/04_this_cohort/live_code/module_4/INTERSECT_VS_INNER_JOIN.sql b/04_this_cohort/live_code/module_4/INTERSECT_VS_INNER_JOIN.sql
new file mode 100644
index 000000000..456adcca8
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/INTERSECT_VS_INNER_JOIN.sql
@@ -0,0 +1,36 @@
+DROP TABLE IF EXISTS temp.temp1;
+CREATE TEMP TABLE IF NOT EXISTS temp.temp1
+(id int);
+
+INSERT INTO temp.temp1
+VALUES(null)
+,(1)
+,(3)
+,(3);
+
+DROP TABLE IF EXISTS temp.temp2;
+CREATE TEMP TABLE IF NOT EXISTS temp.temp2
+(id int);
+INSERT INTO temp.temp2
+VALUES(null)
+,(2)
+,(3)
+,(3);
+
+-- distinct and NULL
+select * from temp1
+INTERSECT
+select * from temp2
+
+select * from temp1
+EXCEPT
+select * from temp2
+
+select * from temp2
+EXCEPT
+select * from temp1
+
+-- not distinct and NOT NULL
+select * from temp1 t
+INNER JOIN temp2 x
+ on t.id = x.id
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_4/NTILE.sql b/04_this_cohort/live_code/module_4/NTILE.sql
new file mode 100644
index 000000000..1831f3e53
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/NTILE.sql
@@ -0,0 +1,24 @@
+--NTILE (4,5,100?)
+
+--daily sales
+SELECT *
+,NTILE(4) OVER(PARTITION by vendor_name ORDER by sales asc) as quartile
+,NTILE(5) OVER(PARTITION by vendor_name ORDER by sales asc) as quintile
+,NTILE(100) OVER(PARTITION by vendor_name ORDER by sales asc) as percentile
+
+FROM (
+ SELECT
+ md.market_date,
+ market_week,
+ market_year,
+ vendor_name,
+ sum(quantity*cost_to_customer_per_qty) as sales
+
+ FROM market_date_info md
+ JOIN customer_purchases cp
+ on md.market_date = cp.market_date
+ JOIN vendor v
+ on cp.vendor_id = v.vendor_id
+
+ GROUP BY md.market_date, v.vendor_id
+) x
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_4/ROW_NUMBER.sql b/04_this_cohort/live_code/module_4/ROW_NUMBER.sql
new file mode 100644
index 000000000..73c714cba
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/ROW_NUMBER.sql
@@ -0,0 +1,19 @@
+--windowed function
+-- what was the highest price seen per product for each vendor
+
+SELECT *
+
+FROM (
+
+ SELECT DISTINCT
+ vendor_id
+ --,market_date
+ ,product_id
+ ,original_price
+ ,ROW_NUMBER() OVER(PARTITION BY vendor_id,product_id ORDER BY original_price DESC) as price_rank
+
+ FROM vendor_inventory
+ WHERE vendor_id = 7
+) x
+
+WHERE x.price_rank = 1
diff --git a/04_this_cohort/live_code/module_4/UNION_UNION_ALL.sql b/04_this_cohort/live_code/module_4/UNION_UNION_ALL.sql
new file mode 100644
index 000000000..652a15256
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/UNION_UNION_ALL.sql
@@ -0,0 +1,23 @@
+--UNION/UNION ALL
+--most and least expensive product by vendor with a union
+
+
+SELECT vendor_id, product_id,original_price, rn_max AS [row_number] --renaming because of the union
+FROM (
+ SELECT DISTINCT
+ vendor_id, product_id, original_price
+ ,ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price DESC) as rn_max
+ FROM vendor_inventory
+) x
+WHERE rn_max = 1
+
+UNION -- union returned 5 rows?
+--UNION ALL -- returned 6 rows inlcuding vendor #4 who was a duplicate
+
+SELECT * FROM (
+ SELECT DISTINCT
+ vendor_id, product_id, original_price
+ ,ROW_NUMBER() OVER(PARTITION BY vendor_id ORDER BY original_price ASC) as rn_min
+ FROM vendor_inventory
+) x
+WHERE rn_min = 1
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_4/budget_coalesce_NULLIF.sql b/04_this_cohort/live_code/module_4/budget_coalesce_NULLIF.sql
new file mode 100644
index 000000000..667a83fb8
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/budget_coalesce_NULLIF.sql
@@ -0,0 +1,24 @@
+
+-- create a budget temp table
+DROP TABLE IF EXISTS temp.budgets;
+
+-- here i am specifying the column types, this was asked, so budget is a string, current year is an integer, prev year also int
+CREATE TEMP TABLE IF NOT EXISTS temp.budgets (budget STRING, current_year INT, previous_year INT);
+
+
+--nothing is yet in budget
+INSERT INTO temp.budgets
+
+-- so put as row 1
+VALUES ('software',1000,1000)
+--and row 2
+, ('candles',300,500);
+
+--show me the average difference in years
+--NULLIF, if the numbers are the same, then NULL
+--COALESCE, if the result is NULL then 0.00
+--average across the values = change in years
+SELECT AVG(COALESCE(NULLIF(current_year, previous_year), 0.00))
+FROM budgets
+
+--result(300 [current year for candles] +0 / 2 [two rows] = 150.0)
diff --git a/04_this_cohort/live_code/module_4/row_rank_dense_comparison.sql b/04_this_cohort/live_code/module_4/row_rank_dense_comparison.sql
new file mode 100644
index 000000000..caa4a5994
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/row_rank_dense_comparison.sql
@@ -0,0 +1,29 @@
+--dense_rank vs rank vs row_number
+
+DROP TABLE IF EXISTS temp.row_rank_dense;
+
+CREATE TEMP TABLE IF NOT EXISTS temp.row_rank_dense
+(
+emp_id INT,
+salary INT
+);
+
+INSERT INTO temp.row_rank_dense
+VALUES(1,200000),
+(2,200000),
+(3, 160000),
+(4, 120000),
+(5, 125000),
+(6, 165000),
+(7, 230000),
+(8, 100000),
+(9, 165000),
+(10, 100000);
+
+SELECT *
+,ROW_NUMBER() OVER(ORDER BY salary DESC) as [ROW_NUMBER]
+,RANK() OVER(ORDER BY salary DESC) as [RANK]
+,DENSE_RANK() OVER(ORDER BY salary DESC) as [DENSE_RANK]
+
+
+FROM row_rank_dense
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_4/string_manipulations.sql b/04_this_cohort/live_code/module_4/string_manipulations.sql
new file mode 100644
index 000000000..c2e440ba4
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/string_manipulations.sql
@@ -0,0 +1,43 @@
+--string manipulations
+
+SELECT DISTINCT
+LTRIM(' THOMAS ROSENTHAL ') as [ltrim]
+,RTRIM(' THOMAS ROSENTHAL ') as [rtrim]
+,RTRIM(LTRIM(' THOMAS ROSENTHAL ')) as [both]
+,TRIM(' THOMAS ROSENTHAL ') as [also_both]
+
+,product_name
+,REPLACE(product_name, 'a','e')
+,REPLACE(product_name,'h','1') -- case sensitivity
+,REPLACE(product_name,' ','_') -- replace spaces with underscore (good way to get pot_hole_case)
+
+,UPPER(product_name) as upper_case
+,LOWER(product_name) as lower_case
+
+,product_name || product_size
+
+FROM product;
+
+--concat
+SELECT *
+,customer_first_name || ' ' || customer_last_name as customer_name
+,UPPER(customer_first_name) || ' ' || UPPER(customer_last_name) as upper_last_name
+
+,SUBSTR(customer_last_name,4) -- any length from the 4th character
+,SUBSTR(customer_last_name,4,2) --2 characters long from the 4th character
+--,SUBSTR(customer_last_name, -5,4)
+--,INSTR(customer_last_name,'a')
+
+,length(customer_first_name || ' ' || customer_last_name)
+
+,'THOMAS
+
+ROSENTHAL' -- added a linebreak
+,replace('THOMAS
+
+ROSENTHAL', char(10), ' ') -- removing all linebreaks from this string
+
+FROM customer
+
+WHERE customer_first_name REGEXP '(a)$'
+
diff --git a/04_this_cohort/live_code/module_4/substring_instring_together.sql b/04_this_cohort/live_code/module_4/substring_instring_together.sql
new file mode 100644
index 000000000..3bc04adb2
--- /dev/null
+++ b/04_this_cohort/live_code/module_4/substring_instring_together.sql
@@ -0,0 +1,16 @@
+--substring & instring together
+
+SELECT
+'FirstWord, SecondWord, ThirdWord',
+ SUBSTR('FirstWord, SecondWord, ThirdWord',0, INSTR('FirstWord, SecondWord, ThirdWord',',')) as FirstDelim
+ --,SUBSTR('FirstWord, SecondWord, ThirdWord',0, 10) as FirstDelim -- same thing but not dynamic
+ ,SUBSTR('FirstWord, SecondWord, ThirdWord',
+ INSTR('FirstWord, SecondWord, ThirdWord',',')+1,
+ INSTR('FirstWord, SecondWord, ThirdWord',',')+1) as SecondDelim
+
+ ,SUBSTR('FirstWord, SecondWord, ThirdWord',
+ INSTR(
+ (SUBSTR('FirstWord, SecondWord, ThirdWord',
+ INSTR('FirstWord, SecondWord, ThirdWord',',')+1))
+ ,',') +
+ INSTR('FirstWord, SecondWord, ThirdWord',',')+1) AS ThirdDelim
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/CROSS_JOINS.sql b/04_this_cohort/live_code/module_5/CROSS_JOINS.sql
new file mode 100644
index 000000000..37e5b859e
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/CROSS_JOINS.sql
@@ -0,0 +1,15 @@
+--CROSS JOIN
+
+DROP TABLE IF EXISTS temp.sizes;
+CREATE TEMP TABLE IF NOT EXISTS temp.sizes (size TEXT);
+
+INSERT INTO temp.sizes
+VALUES('small'),
+('medium'),
+('large');
+
+SELECT * from temp.sizes;
+
+SELECT product_name, size
+FROM product
+CROSS JOIN temp.sizes
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/DYNAMIC_VIEW_AFTER_IMPORT.sql b/04_this_cohort/live_code/module_5/DYNAMIC_VIEW_AFTER_IMPORT.sql
new file mode 100644
index 000000000..bb18ac1e2
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/DYNAMIC_VIEW_AFTER_IMPORT.sql
@@ -0,0 +1,29 @@
+--DYNAMIC VIEW
+-- THIS ONLY WORKS IF YOU HAVE DONE THE PROPER STEPS FOR IMPORTING! INSERTING A NEW DATE! AND UPDATING THE NEW DATA TO "TODAY"
+DROP VIEW IF EXISTS todays_vendor_daily_sales;
+CREATE VIEW IF NOT EXISTS todays_vendor_daily_sales AS
+
+ SELECT
+ md.market_date
+ ,market_day
+ ,market_week
+ ,market_year
+ ,vendor_name
+ ,SUM(quantity*cost_to_customer_per_qty) as sales
+
+ FROM market_date_info md
+ INNER JOIN (
+ SELECT * FROM customer_purchases
+ UNION
+ SELECT * FROM new_customer_purchases) cp
+
+ ON md.market_date = cp.market_date
+ INNER JOIN vendor v
+ ON cp.vendor_id = v.vendor_id
+
+ /*PICK ONE OF THE WHERE STATEMENTS!*/
+ --WHERE md.market_date = strftime('%Y-%m-%d',DATE('now'),'-1 day')-- today! --
+ WHERE md.market_date = DATE('now','localtime')
+ --WHERE md.market_date = '2024-12-18'
+
+ GROUP BY cp.market_date, v.vendor_id
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/INSERT_UPDATE_DELETE.sql b/04_this_cohort/live_code/module_5/INSERT_UPDATE_DELETE.sql
new file mode 100644
index 000000000..d0508e742
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/INSERT_UPDATE_DELETE.sql
@@ -0,0 +1,24 @@
+-- INSERT UPDATE DELETE
+
+--1)add a product to the table
+--2) change the product_size for that product
+--3) delete our product
+
+DROP TABLE IF EXISTS temp.product_expanded;
+CREATE TEMP TABLE product_expanded AS
+ SELECT * FROM product;
+
+--INSERT
+INSERT INTO product_expanded
+VALUES(26,'Almonds','1 lb',1,'lbs');
+
+--UPDATE
+-- change the product_size for almonds to 1/2 kg
+UPDATE product_expanded
+SET product_size = '1/2 kg', product_qty_type = 'kg'
+WHERE product_id = 26;
+
+--DELETE our almonds
+DELETE FROM product_expanded
+--SELECT * FROM product_expanded
+WHERE product_id = 26
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/INSERT_UPDATE_FOR_DYNAMIC_VIEW.sql b/04_this_cohort/live_code/module_5/INSERT_UPDATE_FOR_DYNAMIC_VIEW.sql
new file mode 100644
index 000000000..d63f8d565
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/INSERT_UPDATE_FOR_DYNAMIC_VIEW.sql
@@ -0,0 +1,9 @@
+UPDATE new_customer_purchases
+SET market_date =DATE('now','localtime')
+
+INSERT INTO market_date_info
+VALUES('2024-12-18','Wednesday','51','2024','8:00 AM','2:00 PM','nothing interesting','Winter','0','4',1,0)
+
+
+SELECT * FROM todays_vendor_daily_sales
+
diff --git a/04_this_cohort/live_code/module_5/JSON_TO_TABLE.sql b/04_this_cohort/live_code/module_5/JSON_TO_TABLE.sql
new file mode 100644
index 000000000..b15f47c7d
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/JSON_TO_TABLE.sql
@@ -0,0 +1,35 @@
+--JSON to a TABLE
+
+--create a temp TABLE
+--insert the json as a long string
+--write a json_each statement
+--use the json_each statement as a subquery to extract our column values
+-- now we have a table!
+
+DROP TABLE IF EXISTS temp.[new_json];
+CREATE TEMP TABLE IF NOT EXISTS temp.new_json
+(
+the_json BLOB -- the column and the column type
+);
+
+INSERT INTO temp.new_json
+VALUES(
+'[
+ {
+ "country": "Afghanistan",
+ "city": "Kabul"
+ },
+ {
+ "country": "Albania",
+ "city": "Tirana"
+ }]'
+ );
+
+SELECT key
+,JSON_EXTRACT(value,'$.country') as country
+,JSON_EXTRACT(value,'$.city') as city
+
+FROM (
+ SELECT *
+ FROM new_json,JSON_EACH(new_json.col1, '$')
+ ) x
diff --git a/04_this_cohort/live_code/module_5/NEW_VIEW.sql b/04_this_cohort/live_code/module_5/NEW_VIEW.sql
new file mode 100644
index 000000000..ffab90141
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/NEW_VIEW.sql
@@ -0,0 +1,19 @@
+--VIEW
+DROP VIEW IF EXISTS vendor_daily_sales;
+CREATE VIEW IF NOT EXISTS vendor_daily_sales AS
+
+ SELECT
+ md.market_date
+ ,market_day
+ ,market_week
+ ,market_year
+ ,vendor_name
+ ,SUM(quantity*cost_to_customer_per_qty) as sales
+
+ FROM market_date_info md
+ INNER JOIN customer_purchases cp
+ ON md.market_date = cp.market_date
+ INNER JOIN vendor v
+ ON cp.vendor_id = v.vendor_id
+
+ GROUP BY cp.market_date, v.vendor_id
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_5/SELF_JOIN.sql b/04_this_cohort/live_code/module_5/SELF_JOIN.sql
new file mode 100644
index 000000000..8fec61b59
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/SELF_JOIN.sql
@@ -0,0 +1,22 @@
+-- SELF JOIN
+drop table if exists temp.employees;
+create temp table temp.employees
+(
+emp_id int,
+emp_name text,
+mgr_id int
+);
+
+insert into temp.employees
+Values(1,'Thomas',3)
+,(2,'Ernani',4)
+,(3,'Rohan',null)
+,(4,'Jennie',3);
+
+
+SELECT * FROM temp.employees
+
+select a.emp_name,b.emp_name as mgr_name
+from temp.employees a
+left join temp.employees b
+ on a.mgr_id = b.emp_id
diff --git a/04_this_cohort/live_code/module_5/VIEW_IN_ANOTHER_QUERY.sql b/04_this_cohort/live_code/module_5/VIEW_IN_ANOTHER_QUERY.sql
new file mode 100644
index 000000000..eb79d08b9
--- /dev/null
+++ b/04_this_cohort/live_code/module_5/VIEW_IN_ANOTHER_QUERY.sql
@@ -0,0 +1,12 @@
+-- using a view in another query
+SELECT
+market_year
+,market_week
+,vendor_name
+,SUM(sales) as weekly_sales
+
+FROM vendor_daily_sales
+
+GROUP BY market_year
+,market_week
+,vendor_name
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_6/1nf.sql b/04_this_cohort/live_code/module_6/1nf.sql
new file mode 100644
index 000000000..f57239761
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/1nf.sql
@@ -0,0 +1,18 @@
+--1nf
+drop table if exists temp.hold;
+CREATE TABLE temp.hold AS
+SELECT DISTINCT
+name,
+OS,
+SUBSTR(software, 1, INSTR(software,',')-1) AS s1,
+SUBSTR(software,INSTR(software,',')+1, INSTR(SUBSTR(software, INSTR(software, ',')+1),',')-1) as s2,
+SUBSTR(software,INSTR(SUBSTR(software,INSTR(software,',')+1),',')+INSTR(software,',')+1) as s3,
+supervisor
+
+FROM skills;
+
+SELECT name,OS,s1 as software, supervisor FROM hold
+UNION
+SELECT name,OS,s2 as software, supervisor FROM hold
+UNION
+SELECT name,OS,s3 as software, supervisor FROM hold
\ No newline at end of file
diff --git a/04_this_cohort/live_code/module_6/2nf.sql b/04_this_cohort/live_code/module_6/2nf.sql
new file mode 100644
index 000000000..234d98d9a
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/2nf.sql
@@ -0,0 +1,52 @@
+-- 2nf
+drop table if exists temp.student;
+drop table if exists temp.supervisor;
+drop table if exists temp.student_software;
+
+create temp table if not exists temp.supervisor
+(
+id INTEGER PRIMARY KEY AUTOINCREMENT,
+name TEXT
+);
+
+INSERT INTO temp.supervisor(name)
+select distinct supervisor
+from skills;
+
+create temp table if not exists temp.student
+(
+id INTEGER PRIMARY KEY AUTOINCREMENT,
+name TEXT,
+OS TEXT,
+supervisor_id INTEGER,
+CONSTRAINT "fk_supervisor_id" FOREIGN KEY ("supervisor_id") REFERENCES "supervisor" ("id")
+)
+
+INSERT INTO student(name, OS, supervisor_id)
+SELECT DISTINCT
+h.name
+,OS
+,s.id AS supervisor_id
+
+FROM hold h
+JOIN supervisor s
+ on h.supervisor = s.name
+
+CREATE TABLE temp.student_software AS
+SELECT id, software
+
+FROM student s
+JOIN (
+ SELECT name,OS,s1 as software, supervisor FROM hold
+ UNION
+ SELECT name,OS,s2 as software, supervisor FROM hold
+ UNION
+ SELECT name,OS,s3 as software, supervisor FROM hold
+) u
+ON s.name = u.name
+
+--select * from student
+--select * from supervisor
+select * from student_software
+
+
diff --git a/04_this_cohort/live_code/module_6/3nf.sql b/04_this_cohort/live_code/module_6/3nf.sql
new file mode 100644
index 000000000..d5f037013
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/3nf.sql
@@ -0,0 +1,37 @@
+--3nf
+drop table if exists temp.OS;
+drop table if exists temp.software;
+create temp table if not exists temp.OS
+(
+OS_id INTEGER,
+OS TEXT,
+win_only TEXT
+);
+
+insert into temp.OS
+values(1,"win","TRUE"),
+ (2,"mac","FALSE");
+
+
+create temp table if not exists temp.software
+(
+software_id INTEGER PRIMARY KEY AUTOINCREMENT,
+software TEXT,
+win_only TEXT
+);
+
+INSERT INTO temp.software(software, win_only)
+SELECT DISTINCT software, win_only
+FROM student_software s
+CROSS JOIN (
+ SELECT * FROM OS WHERE OS = 'mac'
+);
+
+UPDATE software
+SET win_only = 'TRUE'
+WHERE software.software = ' MSSQL';
+
+SELECT * FROM OS
+--SELECT * FROM software
+
+
diff --git a/04_this_cohort/live_code/module_6/SQLite_and_python.ipynb b/04_this_cohort/live_code/module_6/SQLite_and_python.ipynb
new file mode 100644
index 000000000..9fddb799a
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/SQLite_and_python.ipynb
@@ -0,0 +1,717 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "05e1dbf0",
+ "metadata": {},
+ "source": [
+ "# Connect to FarmersMarket.db"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "f1d8cb62",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import sqlite3\n",
+ "#set your location, slash direction will change for windows and mac\n",
+ "DB = '/Users/thomas/Documents/GitHub/02-intro_sql/05_src/sql/farmersmarket.db' \n",
+ "#establish your connection\n",
+ "conn = sqlite3.connect(DB, isolation_level=None,\n",
+ " detect_types=sqlite3.PARSE_COLNAMES)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "1204e343",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#run your query, use \"\\\" to allow line breaks\n",
+ "db_df = pd.read_sql_query(\"SELECT p.*,pc.product_category_name \\\n",
+ " FROM product p \\\n",
+ " JOIN product_category pc \\\n",
+ " ON p.product_category_id = pc.product_category_id\"\n",
+ " ,conn)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "5c7863ee-08cd-4095-b80a-61f82425bd2e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " product_id | \n",
+ " product_name | \n",
+ " product_size | \n",
+ " product_category_id | \n",
+ " product_qty_type | \n",
+ " product_category_name | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 1 | \n",
+ " Habanero Peppers - Organic | \n",
+ " medium | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2 | \n",
+ " Jalapeno Peppers - Organic | \n",
+ " small | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 3 | \n",
+ " Poblano Peppers - Organic | \n",
+ " large | \n",
+ " 1 | \n",
+ " unit | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 4 | \n",
+ " Banana Peppers - Jar | \n",
+ " 8 oz | \n",
+ " 3 | \n",
+ " unit | \n",
+ " Packaged Prepared Food | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 5 | \n",
+ " Whole Wheat Bread | \n",
+ " 1.5 lbs | \n",
+ " 3 | \n",
+ " unit | \n",
+ " Packaged Prepared Food | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 6 | \n",
+ " Cut Zinnias Bouquet | \n",
+ " medium | \n",
+ " 5 | \n",
+ " unit | \n",
+ " Plants & Flowers | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 7 | \n",
+ " Apple Pie | \n",
+ " 10\" | \n",
+ " 3 | \n",
+ " unit | \n",
+ " Packaged Prepared Food | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 9 | \n",
+ " Sweet Potatoes | \n",
+ " medium | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 10 | \n",
+ " Eggs | \n",
+ " 1 dozen | \n",
+ " 6 | \n",
+ " unit | \n",
+ " Eggs & Meat (Fresh or Frozen) | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 11 | \n",
+ " Pork Chops | \n",
+ " 1 lb | \n",
+ " 6 | \n",
+ " lbs | \n",
+ " Eggs & Meat (Fresh or Frozen) | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 12 | \n",
+ " Baby Salad Lettuce Mix - Bag | \n",
+ " 1/2 lb | \n",
+ " 1 | \n",
+ " unit | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 13 | \n",
+ " Baby Salad Lettuce Mix | \n",
+ " 1 lb | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 14 | \n",
+ " Red Potatoes | \n",
+ " None | \n",
+ " 1 | \n",
+ " None | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 15 | \n",
+ " Red Potatoes - Small | \n",
+ " | \n",
+ " 1 | \n",
+ " None | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 14 | \n",
+ " 16 | \n",
+ " Sweet Corn | \n",
+ " Ear | \n",
+ " 1 | \n",
+ " unit | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 15 | \n",
+ " 17 | \n",
+ " Carrots | \n",
+ " sold by weight | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 16 | \n",
+ " 18 | \n",
+ " Carrots - Organic | \n",
+ " bunch | \n",
+ " 1 | \n",
+ " unit | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 17 | \n",
+ " 19 | \n",
+ " Farmer's Market Resuable Shopping Bag | \n",
+ " medium | \n",
+ " 7 | \n",
+ " unit | \n",
+ " Non-Edible Products | \n",
+ "
\n",
+ " \n",
+ " | 18 | \n",
+ " 20 | \n",
+ " Homemade Beeswax Candles | \n",
+ " 6\" | \n",
+ " 7 | \n",
+ " unit | \n",
+ " Non-Edible Products | \n",
+ "
\n",
+ " \n",
+ " | 19 | \n",
+ " 21 | \n",
+ " Organic Cherry Tomatoes | \n",
+ " pint | \n",
+ " 1 | \n",
+ " unit | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 20 | \n",
+ " 22 | \n",
+ " Roma Tomatoes | \n",
+ " medium | \n",
+ " 1 | \n",
+ " lbs | \n",
+ " Fresh Fruits & Vegetables | \n",
+ "
\n",
+ " \n",
+ " | 21 | \n",
+ " 23 | \n",
+ " Maple Syrup - Jar | \n",
+ " 8 oz | \n",
+ " 2 | \n",
+ " unit | \n",
+ " Packaged Pantry Goods | \n",
+ "
\n",
+ " \n",
+ " | 22 | \n",
+ " 8 | \n",
+ " Cherry Pie | \n",
+ " 10\" | \n",
+ " 3 | \n",
+ " unit | \n",
+ " Packaged Prepared Food | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " product_id product_name product_size \\\n",
+ "0 1 Habanero Peppers - Organic medium \n",
+ "1 2 Jalapeno Peppers - Organic small \n",
+ "2 3 Poblano Peppers - Organic large \n",
+ "3 4 Banana Peppers - Jar 8 oz \n",
+ "4 5 Whole Wheat Bread 1.5 lbs \n",
+ "5 6 Cut Zinnias Bouquet medium \n",
+ "6 7 Apple Pie 10\" \n",
+ "7 9 Sweet Potatoes medium \n",
+ "8 10 Eggs 1 dozen \n",
+ "9 11 Pork Chops 1 lb \n",
+ "10 12 Baby Salad Lettuce Mix - Bag 1/2 lb \n",
+ "11 13 Baby Salad Lettuce Mix 1 lb \n",
+ "12 14 Red Potatoes None \n",
+ "13 15 Red Potatoes - Small \n",
+ "14 16 Sweet Corn Ear \n",
+ "15 17 Carrots sold by weight \n",
+ "16 18 Carrots - Organic bunch \n",
+ "17 19 Farmer's Market Resuable Shopping Bag medium \n",
+ "18 20 Homemade Beeswax Candles 6\" \n",
+ "19 21 Organic Cherry Tomatoes pint \n",
+ "20 22 Roma Tomatoes medium \n",
+ "21 23 Maple Syrup - Jar 8 oz \n",
+ "22 8 Cherry Pie 10\" \n",
+ "\n",
+ " product_category_id product_qty_type product_category_name \n",
+ "0 1 lbs Fresh Fruits & Vegetables \n",
+ "1 1 lbs Fresh Fruits & Vegetables \n",
+ "2 1 unit Fresh Fruits & Vegetables \n",
+ "3 3 unit Packaged Prepared Food \n",
+ "4 3 unit Packaged Prepared Food \n",
+ "5 5 unit Plants & Flowers \n",
+ "6 3 unit Packaged Prepared Food \n",
+ "7 1 lbs Fresh Fruits & Vegetables \n",
+ "8 6 unit Eggs & Meat (Fresh or Frozen) \n",
+ "9 6 lbs Eggs & Meat (Fresh or Frozen) \n",
+ "10 1 unit Fresh Fruits & Vegetables \n",
+ "11 1 lbs Fresh Fruits & Vegetables \n",
+ "12 1 None Fresh Fruits & Vegetables \n",
+ "13 1 None Fresh Fruits & Vegetables \n",
+ "14 1 unit Fresh Fruits & Vegetables \n",
+ "15 1 lbs Fresh Fruits & Vegetables \n",
+ "16 1 unit Fresh Fruits & Vegetables \n",
+ "17 7 unit Non-Edible Products \n",
+ "18 7 unit Non-Edible Products \n",
+ "19 1 unit Fresh Fruits & Vegetables \n",
+ "20 1 lbs Fresh Fruits & Vegetables \n",
+ "21 2 unit Packaged Pantry Goods \n",
+ "22 3 unit Packaged Prepared Food "
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "db_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8b7c36c0",
+ "metadata": {},
+ "source": [
+ "Export the query:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "ee17555e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#save\n",
+ "db_df.to_csv('database-py.CSV', index=False)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ed14b573",
+ "metadata": {},
+ "source": [
+ "# Run a SQL query with pandasql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ac82fb05",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#!pip install pandasql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "4f783bd4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import pandas as pd\n",
+ "import pandasql as sql #this allows us to run SQLite queries!\n",
+ "p = \"https://raw.githubusercontent.com/allisonhorst/palmerpenguins/master/inst/extdata/penguins.csv\"\n",
+ "penguins = pd.read_csv(p) #create a dataframe\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "7892f454",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " species | \n",
+ " island | \n",
+ " bill_length_mm | \n",
+ " bill_depth_mm | \n",
+ " flipper_length_mm | \n",
+ " body_mass_g | \n",
+ " sex | \n",
+ " year | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.1 | \n",
+ " 18.7 | \n",
+ " 181.0 | \n",
+ " 3750.0 | \n",
+ " male | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 39.5 | \n",
+ " 17.4 | \n",
+ " 186.0 | \n",
+ " 3800.0 | \n",
+ " female | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 40.3 | \n",
+ " 18.0 | \n",
+ " 195.0 | \n",
+ " 3250.0 | \n",
+ " female | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " Adelie | \n",
+ " Torgersen | \n",
+ " 36.7 | \n",
+ " 19.3 | \n",
+ " 193.0 | \n",
+ " 3450.0 | \n",
+ " female | \n",
+ " 2007 | \n",
+ "
\n",
+ " \n",
+ " | ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " | 339 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 55.8 | \n",
+ " 19.8 | \n",
+ " 207.0 | \n",
+ " 4000.0 | \n",
+ " male | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " | 340 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 43.5 | \n",
+ " 18.1 | \n",
+ " 202.0 | \n",
+ " 3400.0 | \n",
+ " female | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " | 341 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 49.6 | \n",
+ " 18.2 | \n",
+ " 193.0 | \n",
+ " 3775.0 | \n",
+ " male | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " | 342 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 50.8 | \n",
+ " 19.0 | \n",
+ " 210.0 | \n",
+ " 4100.0 | \n",
+ " male | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ " | 343 | \n",
+ " Chinstrap | \n",
+ " Dream | \n",
+ " 50.2 | \n",
+ " 18.7 | \n",
+ " 198.0 | \n",
+ " 3775.0 | \n",
+ " female | \n",
+ " 2009 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
344 rows × 8 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " species island bill_length_mm bill_depth_mm flipper_length_mm \\\n",
+ "0 Adelie Torgersen 39.1 18.7 181.0 \n",
+ "1 Adelie Torgersen 39.5 17.4 186.0 \n",
+ "2 Adelie Torgersen 40.3 18.0 195.0 \n",
+ "3 Adelie Torgersen NaN NaN NaN \n",
+ "4 Adelie Torgersen 36.7 19.3 193.0 \n",
+ ".. ... ... ... ... ... \n",
+ "339 Chinstrap Dream 55.8 19.8 207.0 \n",
+ "340 Chinstrap Dream 43.5 18.1 202.0 \n",
+ "341 Chinstrap Dream 49.6 18.2 193.0 \n",
+ "342 Chinstrap Dream 50.8 19.0 210.0 \n",
+ "343 Chinstrap Dream 50.2 18.7 198.0 \n",
+ "\n",
+ " body_mass_g sex year \n",
+ "0 3750.0 male 2007 \n",
+ "1 3800.0 female 2007 \n",
+ "2 3250.0 female 2007 \n",
+ "3 NaN NaN 2007 \n",
+ "4 3450.0 female 2007 \n",
+ ".. ... ... ... \n",
+ "339 4000.0 male 2009 \n",
+ "340 3400.0 female 2009 \n",
+ "341 3775.0 male 2009 \n",
+ "342 4100.0 male 2009 \n",
+ "343 3775.0 female 2009 \n",
+ "\n",
+ "[344 rows x 8 columns]"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "penguins"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "8036d336",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "yrly_penguins = sql.sqldf('''SELECT DISTINCT year, COUNT(*) AS count, \n",
+ " SUM(COUNT(*)) OVER (ORDER BY year) AS running_total\n",
+ " FROM penguins\n",
+ " GROUP BY year''') #run a SQLite query with sqldf()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "80fd4dd6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " year | \n",
+ " count | \n",
+ " running_total | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 2007 | \n",
+ " 110 | \n",
+ " 110 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 2008 | \n",
+ " 114 | \n",
+ " 224 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 2009 | \n",
+ " 120 | \n",
+ " 344 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " year count running_total\n",
+ "0 2007 110 110\n",
+ "1 2008 114 224\n",
+ "2 2009 120 344"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "yrly_penguins"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0cd3de3f-fb4f-46ac-ad42-23971226e5d0",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/04_this_cohort/live_code/module_6/denormalized.sql b/04_this_cohort/live_code/module_6/denormalized.sql
new file mode 100644
index 000000000..da4208587
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/denormalized.sql
@@ -0,0 +1,14 @@
+-- normal forms creation
+
+drop table if exists temp.skills;
+create temp table if not exists temp.skills
+(
+name TEXT,
+OS TEXT,
+software TEXT,
+supervisor TEXT
+);
+
+insert into temp.skills
+values("A","win","VSCode, MSSQL, RStudio", "Eric Yu"),
+ ("Thomas","mac", "Spyder, SQLite, RStudio", "Rohan Alexander");
diff --git a/04_this_cohort/live_code/module_6/penguins_in_python_sql.sql b/04_this_cohort/live_code/module_6/penguins_in_python_sql.sql
new file mode 100644
index 000000000..2326c1d29
--- /dev/null
+++ b/04_this_cohort/live_code/module_6/penguins_in_python_sql.sql
@@ -0,0 +1,9 @@
+select * from penguins;
+
+-- how many penguins were identified each year
+SELECT DISTINCT year
+,COUNT(*) AS count
+,SUM(COUNT(*)) OVER (ORDER BY year) AS running_total
+
+ FROM penguins
+GROUP BY year
\ No newline at end of file