From 21c57e99556f7870854685f5dc5f2dd56614c245 Mon Sep 17 00:00:00 2001 From: Chen Dai Date: Wed, 5 Jul 2023 13:46:01 -0700 Subject: [PATCH] Update doc Signed-off-by: Chen Dai --- flint/docs/index.md | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/flint/docs/index.md b/flint/docs/index.md index 81761d63d4..51a792f00c 100644 --- a/flint/docs/index.md +++ b/flint/docs/index.md @@ -17,11 +17,11 @@ A Flint index is ... Please see the following example in which Index Building Logic and Query Rewrite Logic column shows the basic idea behind each skipping index implementation. -| Skipping Index | Create Index Statement (TBD) | Index Building Logic | Query Rewrite Logic | -|----------------|-------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Skipping Index | Create Index Statement | Index Building Logic | Query Rewrite Logic | +|----------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Partition | CREATE SKIPPING INDEX
ON alb_logs
FOR COLUMNS (
  year PARTITION,
  month PARTITION,
  day PARTITION,
  hour PARTITION
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
  FIRST(year) AS year,
  FIRST(month) AS month,
  FIRST(day) AS day,
  FIRST(hour) AS hour,
  input_file_name() AS file_path
FROM alb_logs
GROUP BY
  input_file_name() | SELECT *
FROM alb_logs
WHERE year = 2023 AND month = 4
=>
SELECT *
FROM alb_logs (input_files =
  SELECT file_path
  FROM flint_alb_logs_skipping_index
  WHERE year = 2023 AND month = 4
)
WHERE year = 2023 AND month = 4 | -| ValueSet | CREATE SKIPPING INDEX
ON alb_logs
FOR COLUMNS (
  elb_status_code VALUE_LIST
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
  COLLECT_SET(elb_status_code) AS elb_status_code,
  input_file_name() AS file_path
FROM alb_logs
GROUP BY
  input_file_name() | SELECT *
FROM alb_logs
WHERE elb_status_code = 404
=>
SELECT *
FROM alb_logs (input_files =
  SELECT file_path
  FROM flint_alb_logs_skipping_index
  WHERE ARRAY_CONTAINS(elb_status_code, 404)
)
WHERE elb_status_code = 404 | -| Min-Max | CREATE SKIPPING INDEX
ON alb_logs
FOR COLUMNS (
  request_processing_time MIN_MAX
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
  MIN(request_processing_time) AS request_processing_time_min,
  MAX(request_processing_time) AS request_processing_time_max,
  input_file_name() AS file_path
FROM alb_logs
GROUP BY
  input_file_name() | SELECT *
FROM alb_logs
WHERE request_processing_time = 100
=>
SELECT *
FROM alb_logs (input_files =
SELECT file_path
  FROM flint_alb_logs_skipping_index
  WHERE request_processing_time_min <= 100
    AND 100 <= request_processing_time_max
)
WHERE request_processing_time = 100 +| ValueSet | CREATE SKIPPING INDEX
ON alb_logs
FOR COLUMNS (
  elb_status_code VALUE_SET
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
  COLLECT_SET(elb_status_code) AS elb_status_code,
  input_file_name() AS file_path
FROM alb_logs
GROUP BY
  input_file_name() | SELECT *
FROM alb_logs
WHERE elb_status_code = 404
=>
SELECT *
FROM alb_logs (input_files =
  SELECT file_path
  FROM flint_alb_logs_skipping_index
  WHERE ARRAY_CONTAINS(elb_status_code, 404)
)
WHERE elb_status_code = 404 | +| MinMax | CREATE SKIPPING INDEX
ON alb_logs
FOR COLUMNS (
  request_processing_time MIN_MAX
) | INSERT INTO flint_alb_logs_skipping_index
SELECT
  MIN(request_processing_time) AS request_processing_time_min,
  MAX(request_processing_time) AS request_processing_time_max,
  input_file_name() AS file_path
FROM alb_logs
GROUP BY
  input_file_name() | SELECT *
FROM alb_logs
WHERE request_processing_time = 100
=>
SELECT *
FROM alb_logs (input_files =
SELECT file_path
  FROM flint_alb_logs_skipping_index
  WHERE request_processing_time_min <= 100
    AND 100 <= request_processing_time_max
)
WHERE request_processing_time = 100 ### Flint Index Specification @@ -122,8 +122,9 @@ DDL statement: ```sql CREATE SKIPPING INDEX ON -FOR COLUMNS ( column [, ...] ) +( column [, ...] ) WHERE +WITH (auto_refresh = (true|false)) DESCRIBE SKIPPING INDEX ON @@ -135,19 +136,14 @@ DROP SKIPPING INDEX ON Skipping index type: ```sql - ::= { , , } - - ::= BLOOM_FILTER( bitCount, numOfHashFunctions ) #TBD - ::= MIN_MAX - ::= VALUE_SET + ::= { PARTITION, VALUE_SET, MIN_MAX } ``` Example: ```sql CREATE SKIPPING INDEX ON alb_logs -FOR COLUMNS ( - client_ip BLOOM_FILTER, +( elb_status_code VALUE_SET ) WHERE time > '2023-04-01 00:00:00' @@ -257,7 +253,6 @@ flint.skippingIndex() .addPartitions("year", "month", "day") .addValueSet("elb_status_code") .addMinMax("request_processing_time") - .addBloomFilter("client_ip") .create() flint.refresh("flint_alb_logs_skipping_index", FULL)