From 1c57927853463cf3da16d742daf4072e0e2043a1 Mon Sep 17 00:00:00 2001 From: Phone Thiri Yadana Date: Tue, 19 Jan 2021 15:41:54 +0800 Subject: [PATCH] Windows Function - Lead() and Lag() --- .../01.Lead.sql | 28 +++++++++++++++++++ .../02.Lag.sql | 17 +++++++++++ 2 files changed, 45 insertions(+) create mode 100644 Advanced SQL for Data Science - Time Series/02.Commonly used Functions for Time Series/01.Lead.sql create mode 100644 Advanced SQL for Data Science - Time Series/02.Commonly used Functions for Time Series/02.Lag.sql diff --git a/Advanced SQL for Data Science - Time Series/02.Commonly used Functions for Time Series/01.Lead.sql b/Advanced SQL for Data Science - Time Series/02.Commonly used Functions for Time Series/01.Lead.sql new file mode 100644 index 0000000..350cd6e --- /dev/null +++ b/Advanced SQL for Data Science - Time Series/02.Commonly used Functions for Time Series/01.Lead.sql @@ -0,0 +1,28 @@ +/******** Commonly Used Functions for Time Series ***********/ + +/* we will assume last digit of server_id is department id */ +CREATE VIEW time_series.vw_utilization AS( + SELECT *, server_id % 10 AS dept_id + FROM time_series.utilization +); + +SELECT * FROM time_series.vw_utilization +LIMIT 5; + + + +--------------------- LEAD() function --------------------------- +-- LEAD() looks forwards and allows us to compare condition with the next nth row of current row. +-- we can also put offset of how many next rows we want to get. + +-- next 1 row +SELECT dept_id, server_id, cpu_utilization, + LEAD(cpu_utilization) OVER (PARTITION BY dept_id ORDER BY cpu_utilization DESC) +FROM time_series.vw_utilization +WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06'; + +-- next 3 row +SELECT dept_id, server_id, cpu_utilization, + LEAD(cpu_utilization, 3) OVER (PARTITION BY dept_id ORDER BY cpu_utilization DESC) +FROM time_series.vw_utilization +WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06'; diff --git a/Advanced SQL for Data Science - Time Series/02.Commonly used Functions for Time Series/02.Lag.sql b/Advanced SQL for Data Science - Time Series/02.Commonly used Functions for Time Series/02.Lag.sql new file mode 100644 index 0000000..6d163b1 --- /dev/null +++ b/Advanced SQL for Data Science - Time Series/02.Commonly used Functions for Time Series/02.Lag.sql @@ -0,0 +1,17 @@ +/******** Commonly Used Functions for Time Series ***********/ + +--------------------- LAG() function --------------------------- +-- to reference rows relative to the currently processed rows. +-- LAG() looks backwards and allows us to compare condition with the previous nth row of current row. + +SELECT dept_id, server_id, cpu_utilization, + LAG(cpu_utilization) OVER (PARTITION BY dept_id ORDER BY cpu_utilization DESC) +FROM time_series.vw_utilization +WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06'; + + +-- with offset of 10, looking backwards to previous 10th row from the current one +SELECT dept_id, server_id, cpu_utilization, + LAG(cpu_utilization, 10) OVER (PARTITION BY dept_id ORDER BY cpu_utilization DESC) +FROM time_series.vw_utilization +WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06'; \ No newline at end of file