Skip to content

Commit 6cbbbf7

Browse files
authored
chore(query): constant folder support exclusive check (#18822)
* chore(query): constant folder support exclusive check * chore(query): constant folder support exclusive check * chore(query): constant folder support exclusive check * chore(query): constant folder support exclusive check * chore(query): update * chore(query): update
1 parent 213ccf8 commit 6cbbbf7

File tree

8 files changed

+1260
-719
lines changed

8 files changed

+1260
-719
lines changed

src/query/expression/src/constant_folder.rs

Lines changed: 976 additions & 0 deletions
Large diffs are not rendered by default.

src/query/expression/src/evaluator.rs

Lines changed: 0 additions & 719 deletions
Large diffs are not rendered by default.

src/query/expression/src/lib.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
mod block;
4949

5050
pub mod aggregate;
51+
mod constant_folder;
5152
pub mod converts;
5253
mod evaluator;
5354
mod expression;
@@ -74,6 +75,7 @@ pub use crate::aggregate::*;
7475
pub use crate::block::BlockMetaInfo;
7576
pub use crate::block::BlockMetaInfoPtr;
7677
pub use crate::block::*;
78+
pub use crate::constant_folder::*;
7779
pub use crate::evaluator::*;
7880
pub use crate::expression::*;
7981
pub use crate::filter::*;

src/query/storages/common/index/tests/it/range_pruner.rs

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ use std::sync::Arc;
1717

1818
use databend_common_exception::Result;
1919
use databend_common_expression::types::ArgType;
20+
use databend_common_expression::types::DataType;
2021
use databend_common_expression::types::Int32Type;
2122
use databend_common_expression::types::NumberDataType;
2223
use databend_common_expression::FunctionContext;
@@ -44,6 +45,37 @@ fn test_range_index() -> Result<()> {
4445

4546
run_text(file, "a = 2 and b = 41", domains);
4647
run_text(file, "a = 2 and b < 7", domains);
48+
49+
// test not keep
50+
run_text(file, "a > 2 and a < 1", domains);
51+
run_text(file, "a = 2 and a < 1", domains);
52+
run_text(file, "a < 2 and a > 2", domains);
53+
54+
run_text(file, "a >= 3 and a < 2", domains);
55+
run_text(file, "a <= 1 and a > 3", domains);
56+
run_text(file, "a = 5 and a = 7", domains);
57+
run_text(file, "a > 10 and a < 5", domains);
58+
run_text(file, "a >= 5 and a <= 2", domains);
59+
60+
// test keep - non-mutual exclusion cases
61+
run_text(file, "a > 1 and a < 3", domains);
62+
run_text(file, "a >= 2 and a <= 2", domains);
63+
run_text(file, "a >= 1 and a < 3", domains);
64+
run_text(file, "a > 1 and a <= 2", domains);
65+
run_text(file, "a > 2 and a = 2", domains);
66+
run_text(file, "a < 5 and a = 2", domains);
67+
run_text(file, "a = 2 and a < 5", domains);
68+
run_text(file, "a > 1 and a = 2", domains);
69+
70+
// test complex expressions with multiple columns
71+
run_text(file, "a > 2 and a < 1 and b = 1", domains);
72+
run_text(file, "a = 2 and b > 5 and b < 3", domains);
73+
74+
// test edge cases
75+
run_text(file, "a > 2 and a <= 2", domains);
76+
run_text(file, "a < 2 and a >= 2", domains);
77+
run_text(file, "a = 5 and a != 5", domains);
78+
4779
run_text(file, "to_string(a) = '4'", domains);
4880
run_text(file, "to_string(a) = 'a'", domains);
4981
run_text(file, "to_int8(a) = 3", domains);
@@ -56,6 +88,135 @@ fn test_range_index() -> Result<()> {
5688
Ok(())
5789
}
5890

91+
#[test]
92+
fn test_range_index_dates() -> Result<()> {
93+
let mut mint = Mint::new("tests/it/testdata");
94+
let file = &mut mint.new_goldenfile("test_range_index_dates.txt").unwrap();
95+
96+
// Date scalar helper
97+
fn date_scalar(days: i32) -> Scalar {
98+
Scalar::Date(days)
99+
}
100+
101+
// Timestamp scalar helper
102+
fn timestamp_scalar(micros: i64) -> Scalar {
103+
Scalar::Timestamp(micros)
104+
}
105+
106+
// Test date ranges
107+
let date_domains = &[
108+
("dt", date_scalar(18000), date_scalar(18010)), // ~2019-04-26 to 2019-05-06
109+
];
110+
111+
run_text_with_schema(
112+
file,
113+
"dt > '2019-05-01' and dt < '2019-04-30'",
114+
date_domains,
115+
vec![TableField::new("dt", TableDataType::Date)],
116+
);
117+
118+
run_text_with_schema(
119+
file,
120+
"dt = '2019-05-01' and dt < '2019-04-30'",
121+
date_domains,
122+
vec![TableField::new("dt", TableDataType::Date)],
123+
);
124+
125+
run_text_with_schema(
126+
file,
127+
"dt >= '2019-05-02' and dt <= '2019-05-01'",
128+
date_domains,
129+
vec![TableField::new("dt", TableDataType::Date)],
130+
);
131+
132+
// Non-mutual exclusion cases
133+
run_text_with_schema(
134+
file,
135+
"dt >= '2019-04-28' and dt <= '2019-05-03'",
136+
date_domains,
137+
vec![TableField::new("dt", TableDataType::Date)],
138+
);
139+
140+
// Test timestamp ranges
141+
let ts_domains = &[
142+
(
143+
"ts",
144+
timestamp_scalar(1556668800000000),
145+
timestamp_scalar(1556755200000000),
146+
), // 2019-05-01 to 2019-05-02
147+
];
148+
149+
run_text_with_schema(
150+
file,
151+
"ts > '2019-05-01 12:00:00' and ts < '2019-05-01 06:00:00'",
152+
ts_domains,
153+
vec![TableField::new("ts", TableDataType::Timestamp)],
154+
);
155+
156+
run_text_with_schema(
157+
file,
158+
"ts = '2019-05-01 12:00:00' and ts != '2019-05-01 12:00:00'",
159+
ts_domains,
160+
vec![TableField::new("ts", TableDataType::Timestamp)],
161+
);
162+
163+
Ok(())
164+
}
165+
166+
#[test]
167+
fn test_range_index_strings() -> Result<()> {
168+
let mut mint = Mint::new("tests/it/testdata");
169+
let file = &mut mint.new_goldenfile("test_range_index_strings.txt").unwrap();
170+
171+
// String scalar helper
172+
fn string_scalar(s: &str) -> Scalar {
173+
Scalar::String(s.to_string())
174+
}
175+
176+
// Test date ranges
177+
let string_domains = &[("s", string_scalar("aaefg"), string_scalar("zzefg"))];
178+
run_text_with_schema(file, "s > 'efg' and s = 'efg'", string_domains, vec![
179+
TableField::new("s", TableDataType::String),
180+
]);
181+
182+
run_text_with_schema(file, "s > 'aaefg' and s < 'zzefg'", string_domains, vec![
183+
TableField::new("s", TableDataType::String),
184+
]);
185+
Ok(())
186+
}
187+
188+
fn run_text_with_schema(
189+
file: &mut impl Write,
190+
text: &str,
191+
domains: &[(&str, Scalar, Scalar)],
192+
fields: Vec<TableField>,
193+
) {
194+
let func_ctx = FunctionContext::default();
195+
let schema = Arc::new(TableSchema::new(fields.clone()));
196+
let stats = create_stats(domains, &schema);
197+
198+
let columns: Vec<(&str, DataType)> = fields
199+
.iter()
200+
.map(|f| (f.name().as_str(), f.data_type().into()))
201+
.collect();
202+
203+
let expr = parse_expr(text, &columns);
204+
let index = RangeIndex::try_create(func_ctx, &expr, schema, Default::default()).unwrap();
205+
206+
writeln!(file, "text : {text}").unwrap();
207+
writeln!(file, "expr : {expr}").unwrap();
208+
209+
match index.apply(&stats, |_| false) {
210+
Err(err) => {
211+
writeln!(file, "err : {err}").unwrap();
212+
}
213+
Ok(keep) => {
214+
writeln!(file, "keep : {keep}").unwrap();
215+
}
216+
};
217+
writeln!(file).unwrap();
218+
}
219+
59220
fn create_stats(domains: &[(&str, Scalar, Scalar)], schema: &TableSchema) -> StatisticsOfColumns {
60221
domains
61222
.iter()
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
text : dt > '2019-05-01' and dt < '2019-04-30'
2+
expr : and<Boolean, Boolean>(gt<Date, Date>(dt, CAST<String>("2019-05-01" AS Date)), lt<Date, Date>(dt, CAST<String>("2019-04-30" AS Date)))
3+
keep : false
4+
5+
text : dt = '2019-05-01' and dt < '2019-04-30'
6+
expr : and<Boolean, Boolean>(eq<Date, Date>(dt, CAST<String>("2019-05-01" AS Date)), lt<Date, Date>(dt, CAST<String>("2019-04-30" AS Date)))
7+
keep : false
8+
9+
text : dt >= '2019-05-02' and dt <= '2019-05-01'
10+
expr : and<Boolean, Boolean>(gte<Date, Date>(dt, CAST<String>("2019-05-02" AS Date)), lte<Date, Date>(dt, CAST<String>("2019-05-01" AS Date)))
11+
keep : false
12+
13+
text : dt >= '2019-04-28' and dt <= '2019-05-03'
14+
expr : and<Boolean, Boolean>(gte<Date, Date>(dt, CAST<String>("2019-04-28" AS Date)), lte<Date, Date>(dt, CAST<String>("2019-05-03" AS Date)))
15+
keep : false
16+
17+
text : ts > '2019-05-01 12:00:00' and ts < '2019-05-01 06:00:00'
18+
expr : and<Boolean, Boolean>(gt<Timestamp, Timestamp>(ts, CAST<String>("2019-05-01 12:00:00" AS Timestamp)), lt<Timestamp, Timestamp>(ts, CAST<String>("2019-05-01 06:00:00" AS Timestamp)))
19+
keep : false
20+
21+
text : ts = '2019-05-01 12:00:00' and ts != '2019-05-01 12:00:00'
22+
expr : and<Boolean, Boolean>(eq<Timestamp, Timestamp>(ts, CAST<String>("2019-05-01 12:00:00" AS Timestamp)), noteq<Timestamp, Timestamp>(ts, CAST<String>("2019-05-01 12:00:00" AS Timestamp)))
23+
keep : false
24+
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
text : s > 'efg' and s = 'efg'
2+
expr : and<Boolean, Boolean>(gt<String, String>(s, "efg"), eq<String, String>(s, "efg"))
3+
keep : false
4+
5+
text : s > 'aaefg' and s < 'zzefg'
6+
expr : and<Boolean, Boolean>(gt<String, String>(s, "aaefg"), lt<String, String>(s, "zzefg"))
7+
keep : true
8+

src/query/storages/common/index/tests/it/testdata/test_range_indexs.txt

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,90 @@ text : a = 2 and b < 7
66
expr : and<Boolean, Boolean>(eq<Int32, Int32>(a, 2_i32), lt<Int32, Int32>(b, CAST<UInt8>(7_u8 AS Int32)))
77
keep : true
88

9+
text : a > 2 and a < 1
10+
expr : and<Boolean, Boolean>(gt<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)), lt<Int32, Int32>(a, CAST<UInt8>(1_u8 AS Int32)))
11+
keep : false
12+
13+
text : a = 2 and a < 1
14+
expr : and<Boolean, Boolean>(eq<Int32, Int32>(a, 2_i32), lt<Int32, Int32>(a, CAST<UInt8>(1_u8 AS Int32)))
15+
keep : false
16+
17+
text : a < 2 and a > 2
18+
expr : and<Boolean, Boolean>(lt<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)), gt<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)))
19+
keep : false
20+
21+
text : a >= 3 and a < 2
22+
expr : and<Boolean, Boolean>(gte<Int32, Int32>(a, CAST<UInt8>(3_u8 AS Int32)), lt<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)))
23+
keep : false
24+
25+
text : a <= 1 and a > 3
26+
expr : and<Boolean, Boolean>(lte<Int32, Int32>(a, CAST<UInt8>(1_u8 AS Int32)), gt<Int32, Int32>(a, CAST<UInt8>(3_u8 AS Int32)))
27+
keep : false
28+
29+
text : a = 5 and a = 7
30+
expr : and<Boolean, Boolean>(eq<Int32, Int32>(a, 5_i32), eq<Int32, Int32>(a, 7_i32))
31+
keep : false
32+
33+
text : a > 10 and a < 5
34+
expr : and<Boolean, Boolean>(gt<Int32, Int32>(a, CAST<UInt8>(10_u8 AS Int32)), lt<Int32, Int32>(a, CAST<UInt8>(5_u8 AS Int32)))
35+
keep : false
36+
37+
text : a >= 5 and a <= 2
38+
expr : and<Boolean, Boolean>(gte<Int32, Int32>(a, CAST<UInt8>(5_u8 AS Int32)), lte<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)))
39+
keep : false
40+
41+
text : a > 1 and a < 3
42+
expr : and<Boolean, Boolean>(gt<Int32, Int32>(a, CAST<UInt8>(1_u8 AS Int32)), lt<Int32, Int32>(a, CAST<UInt8>(3_u8 AS Int32)))
43+
keep : true
44+
45+
text : a >= 2 and a <= 2
46+
expr : and<Boolean, Boolean>(gte<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)), lte<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)))
47+
keep : true
48+
49+
text : a >= 1 and a < 3
50+
expr : and<Boolean, Boolean>(gte<Int32, Int32>(a, CAST<UInt8>(1_u8 AS Int32)), lt<Int32, Int32>(a, CAST<UInt8>(3_u8 AS Int32)))
51+
keep : true
52+
53+
text : a > 1 and a <= 2
54+
expr : and<Boolean, Boolean>(gt<Int32, Int32>(a, CAST<UInt8>(1_u8 AS Int32)), lte<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)))
55+
keep : true
56+
57+
text : a > 2 and a = 2
58+
expr : and<Boolean, Boolean>(gt<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)), eq<Int32, Int32>(a, 2_i32))
59+
keep : false
60+
61+
text : a < 5 and a = 2
62+
expr : and<Boolean, Boolean>(lt<Int32, Int32>(a, CAST<UInt8>(5_u8 AS Int32)), eq<Int32, Int32>(a, 2_i32))
63+
keep : true
64+
65+
text : a = 2 and a < 5
66+
expr : and<Boolean, Boolean>(eq<Int32, Int32>(a, 2_i32), lt<Int32, Int32>(a, CAST<UInt8>(5_u8 AS Int32)))
67+
keep : true
68+
69+
text : a > 1 and a = 2
70+
expr : and<Boolean, Boolean>(gt<Int32, Int32>(a, CAST<UInt8>(1_u8 AS Int32)), eq<Int32, Int32>(a, 2_i32))
71+
keep : true
72+
73+
text : a > 2 and a < 1 and b = 1
74+
expr : and<Boolean, Boolean>(and<Boolean, Boolean>(gt<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)), lt<Int32, Int32>(a, CAST<UInt8>(1_u8 AS Int32))), eq<Int32, Int32>(b, 1_i32))
75+
keep : false
76+
77+
text : a = 2 and b > 5 and b < 3
78+
expr : and<Boolean, Boolean>(and<Boolean, Boolean>(eq<Int32, Int32>(a, 2_i32), gt<Int32, Int32>(b, CAST<UInt8>(5_u8 AS Int32))), lt<Int32, Int32>(b, CAST<UInt8>(3_u8 AS Int32)))
79+
keep : false
80+
81+
text : a > 2 and a <= 2
82+
expr : and<Boolean, Boolean>(gt<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)), lte<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)))
83+
keep : false
84+
85+
text : a < 2 and a >= 2
86+
expr : and<Boolean, Boolean>(lt<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)), gte<Int32, Int32>(a, CAST<UInt8>(2_u8 AS Int32)))
87+
keep : false
88+
89+
text : a = 5 and a != 5
90+
expr : and<Boolean, Boolean>(eq<Int32, Int32>(a, 5_i32), noteq<Int32, Int32>(a, CAST<UInt8>(5_u8 AS Int32)))
91+
keep : false
92+
993
text : to_string(a) = '4'
1094
expr : eq<String, String>(CAST<Int32>(a AS String), "4")
1195
keep : false

tests/sqllogictests/suites/mode/standalone/explain/range_pruner.test

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,11 @@ EvalScalar
2727
└── estimated rows: 2.00
2828

2929

30+
query T
31+
explain select 1 from range_t where i > 3 and i = 2
32+
----
33+
EmptyResultScan
34+
3035
query T
3136
explain select 1 from range_t where i > 20
3237
----

0 commit comments

Comments
 (0)