-
Notifications
You must be signed in to change notification settings - Fork 518
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
1 changed file
with
51 additions
and
0 deletions.
There are no files selected for viewing
51 changes: 51 additions & 0 deletions
51
...cience - Time Series/02.03.Modeling Time Series Data - Indexing - Utilization dataset.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/*********** Partitioning Data (using Utilization data set) ****************/ | ||
|
||
-- Load data from text file | ||
COPY time_series.utilization( | ||
event_time,server_id, cpu_utilization, free_memory, session_cnt | ||
) FROM 'C:\temp-sql\utilization.txt' DELIMITER ','; | ||
|
||
|
||
-- check data | ||
SELECT * FROM time_series.utilization | ||
LIMIT 5; | ||
|
||
|
||
|
||
/* average CPU utilization by server id : cost - 3687.71*/ | ||
EXPLAIN SELECT server_id, AVG(cpu_utilization) | ||
FROM time_series.utilization | ||
WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06' | ||
GROUP BY server_id; | ||
|
||
|
||
------------ we will create index to speed things up --------------- | ||
|
||
-- 1) index of event time, server id | ||
CREATE INDEX idx_event_time_utilization | ||
ON time_series.utilization(event_time, server_id); | ||
|
||
/* after indexing cost - 3651.71*/ | ||
EXPLAIN SELECT server_id, AVG(cpu_utilization) | ||
FROM time_series.utilization | ||
WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06' | ||
GROUP BY server_id; | ||
|
||
DROP INDEX time_series.idx_event_time_utilization; | ||
|
||
|
||
-- 2) index of server id, event time (switch order) | ||
CREATE INDEX idx_server_event_utilization | ||
ON time_series.utilization(server_id, event_time); | ||
|
||
|
||
/* | ||
now the index is not even used - 3687.71 | ||
Notice that when event time is in second column to be indexed. | ||
So with only index being server_id as first then by event_time, we wouldn't able to use index at all. | ||
*/ | ||
EXPLAIN SELECT server_id, AVG(cpu_utilization) | ||
FROM time_series.utilization | ||
WHERE event_time BETWEEN '2019-03-05' AND '2019-03-06' | ||
GROUP BY server_id; |