From 5ecc760c4dc8223b67975121d1f04a305284d0bf Mon Sep 17 00:00:00 2001 From: Phone Thiri Yadana Date: Tue, 19 Jan 2021 20:22:41 +0800 Subject: [PATCH] forecasting with linear regression --- .../06.Forecasting with Linear Regression.sql | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 Advanced SQL for Data Science - Time Series/03.Time Series Analysis/06.Forecasting with Linear Regression.sql diff --git a/Advanced SQL for Data Science - Time Series/03.Time Series Analysis/06.Forecasting with Linear Regression.sql b/Advanced SQL for Data Science - Time Series/03.Time Series Analysis/06.Forecasting with Linear Regression.sql new file mode 100644 index 0000000..f6d4d9a --- /dev/null +++ b/Advanced SQL for Data Science - Time Series/03.Time Series Analysis/06.Forecasting with Linear Regression.sql @@ -0,0 +1,36 @@ +/************ Forecasting with Linear Regression **************/ + +/* + +so far, we have been working with past data. +Now we want to make future predictions based on those past data using Linear Regression. + +y=mx + b +m: slope +b: y intercept +y: predicted value +x: input value + +Let's try and predict the amount of free memory will be available given a particular CPU utilization. +*/ + +-- first we will find m and b values : m = -0.46684018640161745, b = 0.6664934543856621 +SELECT + REGR_SLOPE(free_memory, cpu_utilization) AS m, + REGR_INTERCEPT(free_memory, cpu_utilization) AS b +FROM time_series.utilization +WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06'; + + +-- let's say we want to predict free memory based on 65% CPU utilization +-- we predicted 0.36304733322461075 (about 36% of free memory) +SELECT + REGR_SLOPE(free_memory, cpu_utilization) * 0.65 + + REGR_INTERCEPT(free_memory, cpu_utilization) AS b +FROM time_series.utilization +WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06'; + + + + +