diff --git a/Cargo.lock b/Cargo.lock index af799bb9abce5..7501983b0fd13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4598,6 +4598,7 @@ dependencies = [ "enum-as-inner", "fastrace", "globiter", + "goldenfile", "indexmap 2.12.1", "itertools 0.13.0", "jsonb", diff --git a/src/common/storage/src/statistics.rs b/src/common/storage/src/statistics.rs index e367529783d54..9a69165ef1560 100644 --- a/src/common/storage/src/statistics.rs +++ b/src/common/storage/src/statistics.rs @@ -136,20 +136,11 @@ impl Display for Datum { impl Datum { pub fn type_comparable(&self, other: &Datum) -> bool { - matches!( - (self, other), - (Datum::Bool(_), Datum::Bool(_)) - | (Datum::Bytes(_), Datum::Bytes(_)) - | (Datum::Int(_), Datum::UInt(_)) - | (Datum::Int(_), Datum::Int(_)) - | (Datum::Int(_), Datum::Float(_)) - | (Datum::UInt(_), Datum::Int(_)) - | (Datum::UInt(_), Datum::UInt(_)) - | (Datum::UInt(_), Datum::Float(_)) - | (Datum::Float(_), Datum::Float(_)) - | (Datum::Float(_), Datum::Int(_)) - | (Datum::Float(_), Datum::UInt(_)) - ) + self.is_numeric() && other.is_numeric() + || matches!( + (self, other), + (Datum::Bool(_), Datum::Bool(_)) | (Datum::Bytes(_), Datum::Bytes(_)) + ) } pub fn is_numeric(&self) -> bool { @@ -192,21 +183,4 @@ impl Datum { ))), } } - - pub fn can_compare(&self, other: &Self) -> bool { - matches!( - (self, other), - (Datum::Bool(_), Datum::Bool(_)) - | (Datum::Int(_), Datum::Int(_)) - | (Datum::Int(_), Datum::UInt(_)) - | (Datum::Int(_), Datum::Float(_)) - | (Datum::UInt(_), Datum::UInt(_)) - | (Datum::UInt(_), Datum::Int(_)) - | (Datum::UInt(_), Datum::Float(_)) - | (Datum::Float(_), Datum::Float(_)) - | (Datum::Float(_), Datum::Int(_)) - | (Datum::Float(_), Datum::UInt(_)) - | (Datum::Bytes(_), Datum::Bytes(_)) - ) - } } diff --git a/src/query/expression/src/expression.rs b/src/query/expression/src/expression.rs index 464057f7a4d94..3fe7b21c613d1 100644 --- a/src/query/expression/src/expression.rs +++ b/src/query/expression/src/expression.rs @@ -649,6 +649,20 @@ impl Expr { } } + pub fn data_type_remove_generics(&self) -> DataType { + match self { + Expr::Constant(Constant { data_type, .. }) => data_type.clone(), + Expr::ColumnRef(ColumnRef { data_type, .. }) => data_type.clone(), + Expr::Cast(Cast { dest_type, .. }) => dest_type.clone(), + Expr::FunctionCall(FunctionCall { + return_type, + generics, + .. + }) => return_type.remove_generics(generics), + Expr::LambdaFunctionCall(LambdaFunctionCall { return_type, .. }) => return_type.clone(), + } + } + pub fn column_refs(&self) -> HashMap { struct ColumnRefs(HashMap); impl ExprVisitor for ColumnRefs { diff --git a/src/query/expression/src/filter/filter_executor.rs b/src/query/expression/src/filter/filter_executor.rs index 31f879bd77709..aa1e6a6a6544c 100644 --- a/src/query/expression/src/filter/filter_executor.rs +++ b/src/query/expression/src/filter/filter_executor.rs @@ -54,7 +54,7 @@ impl FilterExecutor { fn_registry: &'static FunctionRegistry, keep_order: bool, ) -> Self { - let (select_expr, has_or) = SelectExprBuilder::new().build(&expr).into(); + let (select_expr, has_or) = SelectExprBuilder::new(fn_registry).build(&expr).into(); let true_selection = vec![0; max_block_size]; let false_selection = if has_or { diff --git a/src/query/expression/src/filter/select_expr.rs b/src/query/expression/src/filter/select_expr.rs index 4a7edf4cb3d57..3d8567943dc9d 100644 --- a/src/query/expression/src/filter/select_expr.rs +++ b/src/query/expression/src/filter/select_expr.rs @@ -20,6 +20,7 @@ use itertools::Itertools; use crate::Expr; use crate::Function; use crate::FunctionID; +use crate::FunctionRegistry; use crate::LikePattern; use crate::Scalar; use crate::expr::*; @@ -48,14 +49,32 @@ pub enum SelectExpr { BooleanScalar((Scalar, DataType)), } -#[derive(Default)] pub struct SelectExprBuilder { - not_function: Option<(FunctionID, Arc)>, + not_function: (FunctionID, Arc), + nullable_not_function: (FunctionID, Arc), } impl SelectExprBuilder { - pub fn new() -> Self { - Self::default() + pub fn new(fn_registry: &'static FunctionRegistry) -> Self { + let funcs = + fn_registry.search_candidates("not", &[], &[Expr::::Constant(Constant { + span: None, + scalar: Scalar::Boolean(true), + data_type: DataType::Boolean, + })]); + + SelectExprBuilder { + not_function: funcs + .iter() + .find(|(id, func)| func.signature.return_type.is_boolean()) + .unwrap() + .clone(), + nullable_not_function: funcs + .iter() + .find(|(id, func)| func.signature.return_type.is_nullable()) + .unwrap() + .clone(), + } } pub fn build(&mut self, expr: &Expr) -> SelectExprBuildResult { @@ -139,7 +158,6 @@ impl SelectExprBuilder { .can_reorder(can_reorder) } "not" => { - self.not_function = Some((*id.clone(), function.clone())); let result = self.build_select_expr(&args[0], not ^ true); if result.can_push_down_not { result @@ -215,9 +233,11 @@ impl SelectExprBuilder { .can_push_down_not(false) } Expr::Constant(Constant { - scalar, data_type, .. - }) if matches!(data_type, &DataType::Boolean | &DataType::Nullable(box DataType::Boolean)) => - { + scalar, + data_type: + data_type @ (DataType::Boolean | DataType::Nullable(box DataType::Boolean)), + .. + }) => { let scalar = if not { match scalar { Scalar::Null => Scalar::Null, @@ -259,24 +279,31 @@ impl SelectExprBuilder { fn other_select_expr(&self, expr: &Expr, not: bool) -> SelectExprBuildResult { let can_push_down_not = !not - || matches!(expr.data_type(), DataType::Boolean | DataType::Nullable(box DataType::Boolean)); - let expr = if not && can_push_down_not { - self.wrap_not(expr) + || matches!(expr.data_type_remove_generics(), DataType::Boolean | DataType::Nullable(box DataType::Boolean)); + + let expr = SelectExpr::Others(if not && can_push_down_not { + self.wrap_not(expr.clone()) } else { expr.clone() - }; - SelectExprBuildResult::new(SelectExpr::Others(expr)).can_push_down_not(can_push_down_not) + }); + + SelectExprBuildResult::new(expr).can_push_down_not(can_push_down_not) } - fn wrap_not(&self, expr: &Expr) -> Expr { - let (id, function) = self.not_function.as_ref().unwrap(); + fn wrap_not(&self, expr: Expr) -> Expr { + let data_type = expr.data_type_remove_generics(); + let (id, function) = if data_type.is_nullable() { + self.nullable_not_function.clone() + } else { + self.not_function.clone() + }; FunctionCall { span: None, - id: Box::new(id.clone()), - function: function.clone(), + id: Box::new(id), + return_type: function.signature.return_type.clone(), + function, generics: vec![], - args: vec![expr.clone()], - return_type: expr.data_type().clone(), + args: vec![expr], } .into() } diff --git a/src/query/expression/src/filter/selector.rs b/src/query/expression/src/filter/selector.rs index 8a3ad13d2b9ec..145c233bc7f25 100644 --- a/src/query/expression/src/filter/selector.rs +++ b/src/query/expression/src/filter/selector.rs @@ -81,7 +81,6 @@ impl<'a> Selector<'a> { } // Process `SelectExpr`. - #[allow(clippy::too_many_arguments)] fn process_select_expr( &self, select_expr: &mut SelectExpr, @@ -403,7 +402,6 @@ impl<'a> Selector<'a> { } // Process SelectExpr::Others. - #[allow(clippy::too_many_arguments)] fn process_others( &self, expr: &Expr, @@ -444,7 +442,6 @@ impl<'a> Selector<'a> { self.select_value(Value::Scalar(constant), data_type, buffers, has_false) } - #[allow(clippy::too_many_arguments)] fn process_expr( &self, expr: &Expr, diff --git a/src/query/expression/src/types.rs b/src/query/expression/src/types.rs index d2ed974f2e2a6..e8c853f692645 100755 --- a/src/query/expression/src/types.rs +++ b/src/query/expression/src/types.rs @@ -198,6 +198,7 @@ impl DataType { pub fn has_generic(&self) -> bool { match self { + DataType::Generic(_) => true, DataType::Null | DataType::EmptyArray | DataType::EmptyMap @@ -214,13 +215,44 @@ impl DataType { | DataType::Variant | DataType::Geometry | DataType::Geography - | DataType::Vector(_) => false, + | DataType::Vector(_) + | DataType::Opaque(_) + | DataType::StageLocation => false, DataType::Nullable(ty) => ty.has_generic(), DataType::Array(ty) => ty.has_generic(), DataType::Map(ty) => ty.has_generic(), DataType::Tuple(tys) => tys.iter().any(|ty| ty.has_generic()), - DataType::Generic(_) => true, - DataType::Opaque(_) | DataType::StageLocation => false, + } + } + + pub fn remove_generics(&self, generics: &[DataType]) -> DataType { + match self { + DataType::Generic(i) => generics[*i].clone(), + DataType::Null + | DataType::EmptyArray + | DataType::EmptyMap + | DataType::Boolean + | DataType::Binary + | DataType::String + | DataType::Number(_) + | DataType::Decimal(_) + | DataType::Timestamp + | DataType::TimestampTz + | DataType::Date + | DataType::Interval + | DataType::Bitmap + | DataType::Variant + | DataType::Geometry + | DataType::Geography + | DataType::Vector(_) + | DataType::Opaque(_) + | DataType::StageLocation => self.clone(), + DataType::Nullable(ty) => DataType::Nullable(Box::new(ty.remove_generics(generics))), + DataType::Array(ty) => DataType::Array(Box::new(ty.remove_generics(generics))), + DataType::Map(ty) => DataType::Map(Box::new(ty.remove_generics(generics))), + DataType::Tuple(tys) => { + DataType::Tuple(tys.iter().map(|ty| ty.remove_generics(generics)).collect()) + } } } diff --git a/src/query/expression/src/utils/display.rs b/src/query/expression/src/utils/display.rs index 6a649b3ed8379..033021e87cf08 100755 --- a/src/query/expression/src/utils/display.rs +++ b/src/query/expression/src/utils/display.rs @@ -1027,7 +1027,35 @@ impl Expr { function, args, id, .. }) => match (function.signature.name.as_str(), args.as_slice()) { ("and", [lhs, rhs]) => write_binary_op("AND", lhs, rhs, 10, min_precedence), + ("and_filters", args) if !args.is_empty() => { + let precedence = 10; + let str = args + .iter() + .map(|arg| write_expr(arg, precedence)) + .collect::>() + .join(" and "); + + if precedence < min_precedence { + format!("({str})") + } else { + str + } + } ("or", [lhs, rhs]) => write_binary_op("OR", lhs, rhs, 5, min_precedence), + ("or_filters", args) if !args.is_empty() => { + let precedence = 5; + let str = args + .iter() + .map(|arg| write_expr(arg, precedence)) + .collect::>() + .join(" or "); + + if precedence < min_precedence { + format!("({str})") + } else { + str + } + } ("not", [expr]) => write_unary_op("NOT", expr, 15, min_precedence), ("gte", [lhs, rhs]) => write_binary_op(">=", lhs, rhs, 20, min_precedence), ("gt", [lhs, rhs]) => write_binary_op(">", lhs, rhs, 20, min_precedence), diff --git a/src/query/expression/src/values.rs b/src/query/expression/src/values.rs index c21e21315a02a..6f9251391be80 100755 --- a/src/query/expression/src/values.rs +++ b/src/query/expression/src/values.rs @@ -424,6 +424,13 @@ impl Value { pub fn is_scalar_null(&self) -> bool { *self == Value::Scalar(Scalar::Null) } + + pub fn is_value_of_type(&self, data_type: &DataType) -> bool { + match self { + Value::Scalar(scalar) => scalar.as_ref().is_value_of_type(data_type), + Value::Column(column) => column.data_type() == *data_type, + } + } } impl Scalar { diff --git a/src/query/service/src/physical_plans/format/format_table_scan.rs b/src/query/service/src/physical_plans/format/format_table_scan.rs index c34b10659cc86..96c0a0bbcc567 100644 --- a/src/query/service/src/physical_plans/format/format_table_scan.rs +++ b/src/query/service/src/physical_plans/format/format_table_scan.rs @@ -139,7 +139,7 @@ impl<'a> PhysicalFormat for TableScanFormatter<'a> { if let Some(agg_index) = agg_index { let (_, agg_index_sql, _) = ctx .metadata - .get_agg_indexes(&table_name) + .get_agg_indices(&table_name) .unwrap() .iter() .find(|(index, _, _)| *index == agg_index.index_id) diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt index a0acd410afb74..ff590d6309b7c 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_avg_case_expression_physical.txt @@ -107,7 +107,7 @@ Exchange ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(a0c.a0d (#0) >= '20240526', a0c.a0d (#0) <= '20250525')], limit: NONE] + ├── push downs: [filters: [a0c.a0d (#0) >= '20240526' and a0c.a0d (#0) <= '20250525'], limit: NONE] ├── apply join filters: [#2, #3, #1, #0] └── estimated rows: 3042216421.58 diff --git a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt index 8e8a2f0020004..ea751a082764d 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/data/results/obfuscated/01_multi_join_sum_case_expression_physical.txt @@ -103,7 +103,7 @@ Exchange ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(a0c.a0d (#0) >= '20240526', a0c.a0d (#0) <= '20250525')], limit: NONE] + ├── push downs: [filters: [a0c.a0d (#0) >= '20240526' and a0c.a0d (#0) <= '20250525'], limit: NONE] ├── apply join filters: [#2, #3, #1, #0] └── estimated rows: 3042216421.58 diff --git a/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/agg_index_query_rewrite.rs b/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/agg_index_query_rewrite.rs index f055ce6f3d63b..b4e8204792cb0 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/agg_index_query_rewrite.rs +++ b/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/agg_index_query_rewrite.rs @@ -381,7 +381,7 @@ async fn test_query_rewrite_impl(format: &str) -> Result<()> { let (mut query, _, metadata) = plan_sql(ctx.clone(), suite.query, true).await?; { let mut metadata = metadata.write(); - metadata.add_agg_indexes("default.default.t".to_string(), vec![( + metadata.add_agg_indices("default.default.t".to_string(), vec![( 0, suite.index.to_string(), index, diff --git a/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/eager_aggregation.rs b/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/eager_aggregation.rs index 25761b5f533ab..8275d96ecf0e8 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/eager_aggregation.rs +++ b/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/eager_aggregation.rs @@ -40,8 +40,6 @@ use goldenfile::Mint; use super::test_utils::execute_sql; use super::test_utils::raw_plan; -// FIXME: -#[ignore] #[tokio::test(flavor = "multi_thread")] async fn test_eager_aggregation() -> Result<()> { let mut mint = Mint::new("tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/testdata"); diff --git a/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/testdata/eager_aggregation.txt b/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/testdata/eager_aggregation.txt index 9a90a6e1b9918..07cb9b7382bc4 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/testdata/eager_aggregation.txt +++ b/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/agg_rules/testdata/eager_aggregation.txt @@ -38,25 +38,25 @@ Sort ├── build keys: [lineitem.l_orderkey (#17)] ├── probe keys: [orders.o_orderkey (#0)] ├── other filters: [] - ├── Join(Inner) - │ ├── build keys: [customer.c_custkey (#9)] - │ ├── probe keys: [orders.o_custkey (#1)] - │ ├── other filters: [] - │ ├── Scan - │ │ ├── table: default.orders (#0) - │ │ ├── filters: [lt(orders.o_orderdate (#4), '1995-03-15')] - │ │ ├── order by: [] - │ │ └── limit: NONE - │ └── Scan - │ ├── table: default.customer (#1) - │ ├── filters: [eq(customer.c_mktsegment (#15), 'BUILDING')] - │ ├── order by: [] - │ └── limit: NONE - └── Scan - ├── table: default.lineitem (#2) - ├── filters: [gt(lineitem.l_shipdate (#27), '1995-03-15')] - ├── order by: [] - └── limit: NONE + ├── Scan + │ ├── table: default.lineitem (#2) + │ ├── filters: [gt(lineitem.l_shipdate (#27), '1995-03-15')] + │ ├── order by: [] + │ └── limit: NONE + └── Join(Inner) + ├── build keys: [customer.c_custkey (#9)] + ├── probe keys: [orders.o_custkey (#1)] + ├── other filters: [] + ├── Scan + │ ├── table: default.customer (#1) + │ ├── filters: [eq(customer.c_mktsegment (#15), 'BUILDING')] + │ ├── order by: [] + │ └── limit: NONE + └── Scan + ├── table: default.orders (#0) + ├── filters: [lt(orders.o_orderdate (#4), '1995-03-15')] + ├── order by: [] + └── limit: NONE === #0 apply plan 0 === EvalScalar @@ -71,33 +71,33 @@ EvalScalar ├── build keys: [lineitem.l_orderkey (#17)] ├── probe keys: [orders.o_orderkey (#0)] ├── other filters: [] - ├── Join(Inner) - │ ├── build keys: [customer.c_custkey (#9)] - │ ├── probe keys: [orders.o_custkey (#1)] - │ ├── other filters: [] - │ ├── Scan - │ │ ├── table: default.orders (#0) - │ │ ├── filters: [lt(orders.o_orderdate (#4), '1995-03-15')] - │ │ ├── order by: [] - │ │ └── limit: NONE - │ └── Scan - │ ├── table: default.customer (#1) - │ ├── filters: [eq(customer.c_mktsegment (#15), 'BUILDING')] - │ ├── order by: [] - │ └── limit: NONE - └── Aggregate(Final) - ├── group items: [lineitem.l_orderkey (#17) AS (#17)] - ├── aggregate functions: [sum(sum_arg_0 (#33)) AS (#34)] - └── Aggregate(Partial) - ├── group items: [lineitem.l_orderkey (#17) AS (#17)] - ├── aggregate functions: [sum(sum_arg_0 (#33)) AS (#34)] - └── EvalScalar - ├── scalars: [lineitem.l_orderkey (#17) AS (#17), multiply(lineitem.l_extendedprice (#22), minus(1, lineitem.l_discount (#23))) AS (#33), lineitem.l_shipdate (#27) AS (#39)] - └── Scan - ├── table: default.lineitem (#2) - ├── filters: [gt(lineitem.l_shipdate (#27), '1995-03-15')] - ├── order by: [] - └── limit: NONE + ├── Aggregate(Final) + │ ├── group items: [lineitem.l_orderkey (#17) AS (#17)] + │ ├── aggregate functions: [sum(sum_arg_0 (#33)) AS (#34)] + │ └── Aggregate(Partial) + │ ├── group items: [lineitem.l_orderkey (#17) AS (#17)] + │ ├── aggregate functions: [sum(sum_arg_0 (#33)) AS (#34)] + │ └── EvalScalar + │ ├── scalars: [lineitem.l_orderkey (#17) AS (#17), multiply(lineitem.l_extendedprice (#22), minus(1, lineitem.l_discount (#23))) AS (#33), lineitem.l_shipdate (#27) AS (#39)] + │ └── Scan + │ ├── table: default.lineitem (#2) + │ ├── filters: [gt(lineitem.l_shipdate (#27), '1995-03-15')] + │ ├── order by: [] + │ └── limit: NONE + └── Join(Inner) + ├── build keys: [customer.c_custkey (#9)] + ├── probe keys: [orders.o_custkey (#1)] + ├── other filters: [] + ├── Scan + │ ├── table: default.customer (#1) + │ ├── filters: [eq(customer.c_mktsegment (#15), 'BUILDING')] + │ ├── order by: [] + │ └── limit: NONE + └── Scan + ├── table: default.orders (#0) + ├── filters: [lt(orders.o_orderdate (#4), '1995-03-15')] + ├── order by: [] + └── limit: NONE === #0 apply plan 1 === EvalScalar @@ -114,31 +114,31 @@ EvalScalar ├── build keys: [lineitem.l_orderkey (#17)] ├── probe keys: [orders.o_orderkey (#0)] ├── other filters: [] - ├── Aggregate(Final) - │ ├── group items: [orders.o_orderdate (#4) AS (#4), orders.o_shippriority (#7) AS (#7), orders.o_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [count() AS (#40)] - │ └── Aggregate(Partial) - │ ├── group items: [orders.o_orderdate (#4) AS (#4), orders.o_shippriority (#7) AS (#7), orders.o_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [count() AS (#40)] - │ └── Join(Inner) - │ ├── build keys: [customer.c_custkey (#9)] - │ ├── probe keys: [orders.o_custkey (#1)] - │ ├── other filters: [] - │ ├── Scan - │ │ ├── table: default.orders (#0) - │ │ ├── filters: [lt(orders.o_orderdate (#4), '1995-03-15')] - │ │ ├── order by: [] - │ │ └── limit: NONE - │ └── Scan - │ ├── table: default.customer (#1) - │ ├── filters: [eq(customer.c_mktsegment (#15), 'BUILDING')] - │ ├── order by: [] - │ └── limit: NONE - └── Scan - ├── table: default.lineitem (#2) - ├── filters: [gt(lineitem.l_shipdate (#27), '1995-03-15')] - ├── order by: [] - └── limit: NONE + ├── Scan + │ ├── table: default.lineitem (#2) + │ ├── filters: [gt(lineitem.l_shipdate (#27), '1995-03-15')] + │ ├── order by: [] + │ └── limit: NONE + └── Aggregate(Final) + ├── group items: [orders.o_orderdate (#4) AS (#4), orders.o_shippriority (#7) AS (#7), orders.o_orderkey (#0) AS (#0)] + ├── aggregate functions: [count() AS (#40)] + └── Aggregate(Partial) + ├── group items: [orders.o_orderdate (#4) AS (#4), orders.o_shippriority (#7) AS (#7), orders.o_orderkey (#0) AS (#0)] + ├── aggregate functions: [count() AS (#40)] + └── Join(Inner) + ├── build keys: [customer.c_custkey (#9)] + ├── probe keys: [orders.o_custkey (#1)] + ├── other filters: [] + ├── Scan + │ ├── table: default.customer (#1) + │ ├── filters: [eq(customer.c_mktsegment (#15), 'BUILDING')] + │ ├── order by: [] + │ └── limit: NONE + └── Scan + ├── table: default.orders (#0) + ├── filters: [lt(orders.o_orderdate (#4), '1995-03-15')] + ├── order by: [] + └── limit: NONE === #0 apply plan 2 === EvalScalar @@ -156,36 +156,36 @@ EvalScalar ├── probe keys: [orders.o_orderkey (#0)] ├── other filters: [] ├── Aggregate(Final) - │ ├── group items: [orders.o_orderdate (#4) AS (#4), orders.o_shippriority (#7) AS (#7), orders.o_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [count() AS (#40)] + │ ├── group items: [lineitem.l_orderkey (#17) AS (#17)] + │ ├── aggregate functions: [sum(sum_arg_0 (#33)) AS (#34)] │ └── Aggregate(Partial) - │ ├── group items: [orders.o_orderdate (#4) AS (#4), orders.o_shippriority (#7) AS (#7), orders.o_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [count() AS (#40)] - │ └── Join(Inner) - │ ├── build keys: [customer.c_custkey (#9)] - │ ├── probe keys: [orders.o_custkey (#1)] - │ ├── other filters: [] - │ ├── Scan - │ │ ├── table: default.orders (#0) - │ │ ├── filters: [lt(orders.o_orderdate (#4), '1995-03-15')] - │ │ ├── order by: [] - │ │ └── limit: NONE + │ ├── group items: [lineitem.l_orderkey (#17) AS (#17)] + │ ├── aggregate functions: [sum(sum_arg_0 (#33)) AS (#34)] + │ └── EvalScalar + │ ├── scalars: [lineitem.l_orderkey (#17) AS (#17), multiply(lineitem.l_extendedprice (#22), minus(1, lineitem.l_discount (#23))) AS (#33), lineitem.l_shipdate (#27) AS (#39)] │ └── Scan - │ ├── table: default.customer (#1) - │ ├── filters: [eq(customer.c_mktsegment (#15), 'BUILDING')] + │ ├── table: default.lineitem (#2) + │ ├── filters: [gt(lineitem.l_shipdate (#27), '1995-03-15')] │ ├── order by: [] │ └── limit: NONE └── Aggregate(Final) - ├── group items: [lineitem.l_orderkey (#17) AS (#17)] - ├── aggregate functions: [sum(sum_arg_0 (#33)) AS (#34)] + ├── group items: [orders.o_orderdate (#4) AS (#4), orders.o_shippriority (#7) AS (#7), orders.o_orderkey (#0) AS (#0)] + ├── aggregate functions: [count() AS (#40)] └── Aggregate(Partial) - ├── group items: [lineitem.l_orderkey (#17) AS (#17)] - ├── aggregate functions: [sum(sum_arg_0 (#33)) AS (#34)] - └── EvalScalar - ├── scalars: [lineitem.l_orderkey (#17) AS (#17), multiply(lineitem.l_extendedprice (#22), minus(1, lineitem.l_discount (#23))) AS (#33), lineitem.l_shipdate (#27) AS (#39)] + ├── group items: [orders.o_orderdate (#4) AS (#4), orders.o_shippriority (#7) AS (#7), orders.o_orderkey (#0) AS (#0)] + ├── aggregate functions: [count() AS (#40)] + └── Join(Inner) + ├── build keys: [customer.c_custkey (#9)] + ├── probe keys: [orders.o_custkey (#1)] + ├── other filters: [] + ├── Scan + │ ├── table: default.customer (#1) + │ ├── filters: [eq(customer.c_mktsegment (#15), 'BUILDING')] + │ ├── order by: [] + │ └── limit: NONE └── Scan - ├── table: default.lineitem (#2) - ├── filters: [gt(lineitem.l_shipdate (#27), '1995-03-15')] + ├── table: default.orders (#0) + ├── filters: [lt(orders.o_orderdate (#4), '1995-03-15')] ├── order by: [] └── limit: NONE @@ -212,13 +212,13 @@ EvalScalar ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] ├── Scan - │ ├── table: default.lineitem (#0) - │ ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] + │ ├── table: default.orders (#1) + │ ├── filters: [] │ ├── order by: [] │ └── limit: NONE └── Scan - ├── table: default.orders (#1) - ├── filters: [] + ├── table: default.lineitem (#0) + ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] ├── order by: [] └── limit: NONE @@ -235,24 +235,24 @@ EvalScalar ├── build keys: [orders.o_orderkey (#16)] ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] - ├── Aggregate(Final) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#25)) AS (#26)] - │ └── Aggregate(Partial) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#25)) AS (#26)] - │ └── EvalScalar - │ ├── scalars: [multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#25), lineitem.l_orderkey (#0) AS (#27), lineitem.l_returnflag (#8) AS (#28)] - │ └── Scan - │ ├── table: default.lineitem (#0) - │ ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] - │ ├── order by: [] - │ └── limit: NONE - └── Scan - ├── table: default.orders (#1) - ├── filters: [] - ├── order by: [] - └── limit: NONE + ├── Scan + │ ├── table: default.orders (#1) + │ ├── filters: [] + │ ├── order by: [] + │ └── limit: NONE + └── Aggregate(Final) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#25)) AS (#26)] + └── Aggregate(Partial) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#25)) AS (#26)] + └── EvalScalar + ├── scalars: [multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#25), lineitem.l_orderkey (#0) AS (#27), lineitem.l_returnflag (#8) AS (#28)] + └── Scan + ├── table: default.lineitem (#0) + ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] + ├── order by: [] + └── limit: NONE === #1 apply plan 1 === EvalScalar @@ -269,22 +269,22 @@ EvalScalar ├── build keys: [orders.o_orderkey (#16)] ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] - ├── Scan - │ ├── table: default.lineitem (#0) - │ ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] - │ ├── order by: [] - │ └── limit: NONE - └── Aggregate(Final) - ├── group items: [orders.o_orderkey (#16) AS (#16)] - ├── aggregate functions: [count() AS (#29)] - └── Aggregate(Partial) - ├── group items: [orders.o_orderkey (#16) AS (#16)] - ├── aggregate functions: [count() AS (#29)] - └── Scan - ├── table: default.orders (#1) - ├── filters: [] - ├── order by: [] - └── limit: NONE + ├── Aggregate(Final) + │ ├── group items: [orders.o_orderkey (#16) AS (#16)] + │ ├── aggregate functions: [count() AS (#29)] + │ └── Aggregate(Partial) + │ ├── group items: [orders.o_orderkey (#16) AS (#16)] + │ ├── aggregate functions: [count() AS (#29)] + │ └── Scan + │ ├── table: default.orders (#1) + │ ├── filters: [] + │ ├── order by: [] + │ └── limit: NONE + └── Scan + ├── table: default.lineitem (#0) + ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] + ├── order by: [] + └── limit: NONE === #1 apply plan 2 === EvalScalar @@ -302,29 +302,29 @@ EvalScalar ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] ├── Aggregate(Final) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#25)) AS (#26)] + │ ├── group items: [orders.o_orderkey (#16) AS (#16)] + │ ├── aggregate functions: [count() AS (#29)] │ └── Aggregate(Partial) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#25)) AS (#26)] - │ └── EvalScalar - │ ├── scalars: [multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#25), lineitem.l_orderkey (#0) AS (#27), lineitem.l_returnflag (#8) AS (#28)] - │ └── Scan - │ ├── table: default.lineitem (#0) - │ ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] - │ ├── order by: [] - │ └── limit: NONE + │ ├── group items: [orders.o_orderkey (#16) AS (#16)] + │ ├── aggregate functions: [count() AS (#29)] + │ └── Scan + │ ├── table: default.orders (#1) + │ ├── filters: [] + │ ├── order by: [] + │ └── limit: NONE └── Aggregate(Final) - ├── group items: [orders.o_orderkey (#16) AS (#16)] - ├── aggregate functions: [count() AS (#29)] + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#25)) AS (#26)] └── Aggregate(Partial) - ├── group items: [orders.o_orderkey (#16) AS (#16)] - ├── aggregate functions: [count() AS (#29)] - └── Scan - ├── table: default.orders (#1) - ├── filters: [] - ├── order by: [] - └── limit: NONE + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#25)) AS (#26)] + └── EvalScalar + ├── scalars: [multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#25), lineitem.l_orderkey (#0) AS (#27), lineitem.l_returnflag (#8) AS (#28)] + └── Scan + ├── table: default.lineitem (#0) + ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] + ├── order by: [] + └── limit: NONE === #2 sql === SELECT o_orderkey, sum(l_extendedprice), sum(o_totalprice) @@ -348,12 +348,12 @@ EvalScalar ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] ├── Scan - │ ├── table: default.lineitem (#0) + │ ├── table: default.orders (#1) │ ├── filters: [] │ ├── order by: [] │ └── limit: NONE └── Scan - ├── table: default.orders (#1) + ├── table: default.lineitem (#0) ├── filters: [] ├── order by: [] └── limit: NONE @@ -373,24 +373,24 @@ EvalScalar ├── build keys: [orders.o_orderkey (#16)] ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] - ├── Aggregate(Final) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(lineitem.l_extendedprice (#5)) AS (#25), count() AS (#28)] - │ └── Aggregate(Partial) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(lineitem.l_extendedprice (#5)) AS (#25), count() AS (#28)] - │ └── EvalScalar - │ ├── scalars: [lineitem.l_extendedprice (#5) AS (#5), lineitem.l_orderkey (#0) AS (#27)] - │ └── Scan - │ ├── table: default.lineitem (#0) - │ ├── filters: [] - │ ├── order by: [] - │ └── limit: NONE - └── Scan - ├── table: default.orders (#1) - ├── filters: [] - ├── order by: [] - └── limit: NONE + ├── Scan + │ ├── table: default.orders (#1) + │ ├── filters: [] + │ ├── order by: [] + │ └── limit: NONE + └── Aggregate(Final) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(lineitem.l_extendedprice (#5)) AS (#25), count() AS (#28)] + └── Aggregate(Partial) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(lineitem.l_extendedprice (#5)) AS (#25), count() AS (#28)] + └── EvalScalar + ├── scalars: [lineitem.l_extendedprice (#5) AS (#5), lineitem.l_orderkey (#0) AS (#27)] + └── Scan + ├── table: default.lineitem (#0) + ├── filters: [] + ├── order by: [] + └── limit: NONE === #2 apply plan 1 === EvalScalar @@ -408,28 +408,28 @@ EvalScalar ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] ├── Aggregate(Final) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(lineitem.l_extendedprice (#5)) AS (#25), count() AS (#28)] + │ ├── group items: [orders.o_orderkey (#16) AS (#16)] + │ ├── aggregate functions: [sum(orders.o_totalprice (#19)) AS (#26), count() AS (#29)] │ └── Aggregate(Partial) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(lineitem.l_extendedprice (#5)) AS (#25), count() AS (#28)] + │ ├── group items: [orders.o_orderkey (#16) AS (#16)] + │ ├── aggregate functions: [sum(orders.o_totalprice (#19)) AS (#26), count() AS (#29)] │ └── EvalScalar - │ ├── scalars: [lineitem.l_extendedprice (#5) AS (#5), lineitem.l_orderkey (#0) AS (#27)] + │ ├── scalars: [orders.o_orderkey (#16) AS (#16), orders.o_totalprice (#19) AS (#19)] │ └── Scan - │ ├── table: default.lineitem (#0) + │ ├── table: default.orders (#1) │ ├── filters: [] │ ├── order by: [] │ └── limit: NONE └── Aggregate(Final) - ├── group items: [orders.o_orderkey (#16) AS (#16)] - ├── aggregate functions: [sum(orders.o_totalprice (#19)) AS (#26), count() AS (#29)] + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(lineitem.l_extendedprice (#5)) AS (#25), count() AS (#28)] └── Aggregate(Partial) - ├── group items: [orders.o_orderkey (#16) AS (#16)] - ├── aggregate functions: [sum(orders.o_totalprice (#19)) AS (#26), count() AS (#29)] + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(lineitem.l_extendedprice (#5)) AS (#25), count() AS (#28)] └── EvalScalar - ├── scalars: [orders.o_orderkey (#16) AS (#16), orders.o_totalprice (#19) AS (#19)] + ├── scalars: [lineitem.l_extendedprice (#5) AS (#5), lineitem.l_orderkey (#0) AS (#27)] └── Scan - ├── table: default.orders (#1) + ├── table: default.lineitem (#0) ├── filters: [] ├── order by: [] └── limit: NONE @@ -457,26 +457,26 @@ EvalScalar ├── build keys: [orders.o_orderkey (#18)] ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] - ├── EvalScalar - │ ├── scalars: [lineitem.l_orderkey (#0) AS (#0), sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17)] - │ └── Aggregate(Final) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] - │ └── Aggregate(Partial) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] - │ └── EvalScalar - │ ├── scalars: [lineitem.l_orderkey (#0) AS (#0), multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#16), lineitem.l_returnflag (#8) AS (#28)] - │ └── Scan - │ ├── table: default.lineitem (#0) - │ ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] - │ ├── order by: [] - │ └── limit: NONE - └── Scan - ├── table: default.orders (#1) - ├── filters: [gte(orders.o_orderdate (#22), '1995-05-01'), lte(orders.o_orderdate (#22), '1995-05-31')] - ├── order by: [] - └── limit: NONE + ├── Scan + │ ├── table: default.orders (#1) + │ ├── filters: [gte(orders.o_orderdate (#22), '1995-05-01'), lte(orders.o_orderdate (#22), '1995-05-31')] + │ ├── order by: [] + │ └── limit: NONE + └── EvalScalar + ├── scalars: [lineitem.l_orderkey (#0) AS (#0), sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17)] + └── Aggregate(Final) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] + └── Aggregate(Partial) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] + └── EvalScalar + ├── scalars: [lineitem.l_orderkey (#0) AS (#0), multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#16), lineitem.l_returnflag (#8) AS (#28)] + └── Scan + ├── table: default.lineitem (#0) + ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] + ├── order by: [] + └── limit: NONE === #3 apply plan 0 === EvalScalar @@ -491,34 +491,34 @@ EvalScalar ├── build keys: [orders.o_orderkey (#18)] ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] - ├── Aggregate(Final) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum(l_extendedprice * (1 - l_discount)) (#17)) AS (#27)] - │ └── Aggregate(Partial) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum(l_extendedprice * (1 - l_discount)) (#17)) AS (#27)] - │ └── EvalScalar - │ ├── scalars: [sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17), lineitem.l_orderkey (#0) AS (#29)] - │ └── EvalScalar - │ ├── scalars: [lineitem.l_orderkey (#0) AS (#0), sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17)] - │ └── Aggregate(Final) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] - │ └── Aggregate(Partial) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] - │ └── EvalScalar - │ ├── scalars: [lineitem.l_orderkey (#0) AS (#0), multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#16), lineitem.l_returnflag (#8) AS (#28)] - │ └── Scan - │ ├── table: default.lineitem (#0) - │ ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] - │ ├── order by: [] - │ └── limit: NONE - └── Scan - ├── table: default.orders (#1) - ├── filters: [gte(orders.o_orderdate (#22), '1995-05-01'), lte(orders.o_orderdate (#22), '1995-05-31')] - ├── order by: [] - └── limit: NONE + ├── Scan + │ ├── table: default.orders (#1) + │ ├── filters: [gte(orders.o_orderdate (#22), '1995-05-01'), lte(orders.o_orderdate (#22), '1995-05-31')] + │ ├── order by: [] + │ └── limit: NONE + └── Aggregate(Final) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum(l_extendedprice * (1 - l_discount)) (#17)) AS (#27)] + └── Aggregate(Partial) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum(l_extendedprice * (1 - l_discount)) (#17)) AS (#27)] + └── EvalScalar + ├── scalars: [sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17), lineitem.l_orderkey (#0) AS (#29)] + └── EvalScalar + ├── scalars: [lineitem.l_orderkey (#0) AS (#0), sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17)] + └── Aggregate(Final) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] + └── Aggregate(Partial) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] + └── EvalScalar + ├── scalars: [lineitem.l_orderkey (#0) AS (#0), multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#16), lineitem.l_returnflag (#8) AS (#28)] + └── Scan + ├── table: default.lineitem (#0) + ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] + ├── order by: [] + └── limit: NONE === #3 apply plan 1 === EvalScalar @@ -535,32 +535,32 @@ EvalScalar ├── build keys: [orders.o_orderkey (#18)] ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] - ├── EvalScalar - │ ├── scalars: [lineitem.l_orderkey (#0) AS (#0), sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17)] - │ └── Aggregate(Final) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] - │ └── Aggregate(Partial) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] - │ └── EvalScalar - │ ├── scalars: [lineitem.l_orderkey (#0) AS (#0), multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#16), lineitem.l_returnflag (#8) AS (#28)] - │ └── Scan - │ ├── table: default.lineitem (#0) - │ ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] - │ ├── order by: [] - │ └── limit: NONE - └── Aggregate(Final) - ├── group items: [orders.o_orderkey (#18) AS (#18)] - ├── aggregate functions: [count() AS (#31)] - └── Aggregate(Partial) - ├── group items: [orders.o_orderkey (#18) AS (#18)] - ├── aggregate functions: [count() AS (#31)] - └── Scan - ├── table: default.orders (#1) - ├── filters: [gte(orders.o_orderdate (#22), '1995-05-01'), lte(orders.o_orderdate (#22), '1995-05-31')] - ├── order by: [] - └── limit: NONE + ├── Aggregate(Final) + │ ├── group items: [orders.o_orderkey (#18) AS (#18)] + │ ├── aggregate functions: [count() AS (#31)] + │ └── Aggregate(Partial) + │ ├── group items: [orders.o_orderkey (#18) AS (#18)] + │ ├── aggregate functions: [count() AS (#31)] + │ └── Scan + │ ├── table: default.orders (#1) + │ ├── filters: [gte(orders.o_orderdate (#22), '1995-05-01'), lte(orders.o_orderdate (#22), '1995-05-31')] + │ ├── order by: [] + │ └── limit: NONE + └── EvalScalar + ├── scalars: [lineitem.l_orderkey (#0) AS (#0), sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17)] + └── Aggregate(Final) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] + └── Aggregate(Partial) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] + └── EvalScalar + ├── scalars: [lineitem.l_orderkey (#0) AS (#0), multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#16), lineitem.l_returnflag (#8) AS (#28)] + └── Scan + ├── table: default.lineitem (#0) + ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] + ├── order by: [] + └── limit: NONE === #3 apply plan 2 === EvalScalar @@ -578,37 +578,37 @@ EvalScalar ├── probe keys: [lineitem.l_orderkey (#0)] ├── other filters: [] ├── Aggregate(Final) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum(l_extendedprice * (1 - l_discount)) (#17)) AS (#27)] + │ ├── group items: [orders.o_orderkey (#18) AS (#18)] + │ ├── aggregate functions: [count() AS (#31)] │ └── Aggregate(Partial) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum(l_extendedprice * (1 - l_discount)) (#17)) AS (#27)] - │ └── EvalScalar - │ ├── scalars: [sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17), lineitem.l_orderkey (#0) AS (#29)] - │ └── EvalScalar - │ ├── scalars: [lineitem.l_orderkey (#0) AS (#0), sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17)] - │ └── Aggregate(Final) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] - │ └── Aggregate(Partial) - │ ├── group items: [lineitem.l_orderkey (#0) AS (#0)] - │ ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] - │ └── EvalScalar - │ ├── scalars: [lineitem.l_orderkey (#0) AS (#0), multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#16), lineitem.l_returnflag (#8) AS (#28)] - │ └── Scan - │ ├── table: default.lineitem (#0) - │ ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] - │ ├── order by: [] - │ └── limit: NONE + │ ├── group items: [orders.o_orderkey (#18) AS (#18)] + │ ├── aggregate functions: [count() AS (#31)] + │ └── Scan + │ ├── table: default.orders (#1) + │ ├── filters: [gte(orders.o_orderdate (#22), '1995-05-01'), lte(orders.o_orderdate (#22), '1995-05-31')] + │ ├── order by: [] + │ └── limit: NONE └── Aggregate(Final) - ├── group items: [orders.o_orderkey (#18) AS (#18)] - ├── aggregate functions: [count() AS (#31)] + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum(l_extendedprice * (1 - l_discount)) (#17)) AS (#27)] └── Aggregate(Partial) - ├── group items: [orders.o_orderkey (#18) AS (#18)] - ├── aggregate functions: [count() AS (#31)] - └── Scan - ├── table: default.orders (#1) - ├── filters: [gte(orders.o_orderdate (#22), '1995-05-01'), lte(orders.o_orderdate (#22), '1995-05-31')] - ├── order by: [] - └── limit: NONE + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum(l_extendedprice * (1 - l_discount)) (#17)) AS (#27)] + └── EvalScalar + ├── scalars: [sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17), lineitem.l_orderkey (#0) AS (#29)] + └── EvalScalar + ├── scalars: [lineitem.l_orderkey (#0) AS (#0), sum(l_extendedprice * (1 - l_discount)) (#17) AS (#17)] + └── Aggregate(Final) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] + └── Aggregate(Partial) + ├── group items: [lineitem.l_orderkey (#0) AS (#0)] + ├── aggregate functions: [sum(sum_arg_0 (#16)) AS (#17)] + └── EvalScalar + ├── scalars: [lineitem.l_orderkey (#0) AS (#0), multiply(lineitem.l_extendedprice (#5), minus(1, lineitem.l_discount (#6))) AS (#16), lineitem.l_returnflag (#8) AS (#28)] + └── Scan + ├── table: default.lineitem (#0) + ├── filters: [eq(lineitem.l_returnflag (#8), 'R')] + ├── order by: [] + └── limit: NONE diff --git a/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/filter_rules/push_down_filter_join_test.rs b/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/filter_rules/push_down_filter_join_test.rs index 4500d4b72d89f..40ff7dbf0a1a8 100644 --- a/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/filter_rules/push_down_filter_join_test.rs +++ b/src/query/service/tests/it/sql/planner/optimizer/optimizers/rule/filter_rules/push_down_filter_join_test.rs @@ -539,7 +539,7 @@ fn test_push_down_filter_left_join() -> Result<()> { Filter [or(and(t2.id > 10, t2.value < 50), and(t2.id > 20, t2.qty < 100))] Inner Join [t1.id = t2.id] Table 0 - Filter [or(and(t2.id > 10, t2.value < 50), t2.id > 20)] + Filter [or_filters(and_filters(t2.id > 10, t2.value < 50), t2.id > 20)] Table 1 "#, inspired_by: "TPC-DS Query 13 - complex OR conditions with price ranges", @@ -784,7 +784,7 @@ fn test_push_down_filter_right_join() -> Result<()> { after_pattern: r#" Filter [or(and(t1.a > 10, t1.value < 50), and(t1.a > 20, t1.qty < 100))] Inner Join [t1.id = t2.id] - Filter [or(t1.a > 10, t1.a > 20)] + Filter [or_filters(t1.a > 10, t1.a > 20)] Table 0 Table 1 "#, @@ -1071,7 +1071,7 @@ fn test_push_down_filter_full_join() -> Result<()> { after_pattern: r#" Filter [or(and(t1.a > 10, t1.value < 50), and(t1.a > 20, t1.qty < 100))] Left Join [t1.id = t2.id] - Filter [or(t1.a > 10, t1.a > 20)] + Filter [or_filters(t1.a > 10, t1.a > 20)] Table 0 Table 1 "#, @@ -1181,9 +1181,9 @@ fn test_push_down_complex_or_expressions() -> Result<()> { let after_pattern = r#" Filter [or(and(t1.a = 1, t2.b = 2), and(t1.a = 2, t2.b = 1))] Inner Join [t1.id = t2.id] - Filter [or(t1.a = 1, t1.a = 2)] + Filter [or_filters(t1.a = 1, t1.a = 2)] Table 0 - Filter [or(t2.b = 2, t2.b = 1)] + Filter [or_filters(t2.b = 2, t2.b = 1)] Table 1 "#; diff --git a/src/query/sql/Cargo.toml b/src/query/sql/Cargo.toml index 03317ba21d77e..8a989ca167708 100644 --- a/src/query/sql/Cargo.toml +++ b/src/query/sql/Cargo.toml @@ -75,6 +75,7 @@ unicase = { workspace = true } url = { workspace = true } [dev-dependencies] +goldenfile = { workspace = true } [lints] workspace = true diff --git a/src/query/sql/src/planner/binder/ddl/index.rs b/src/query/sql/src/planner/binder/ddl/index.rs index 85c7a4354eb0e..3426ae7b8bb7b 100644 --- a/src/query/sql/src/planner/binder/ddl/index.rs +++ b/src/query/sql/src/planner/binder/ddl/index.rs @@ -191,7 +191,7 @@ impl Binder { let full_table_name = format!("{catalog}.{database}.{table_name}"); metadata .write() - .add_agg_indexes(full_table_name, agg_indexes); + .add_agg_indices(full_table_name, agg_indexes); } } diff --git a/src/query/sql/src/planner/binder/scalar_common.rs b/src/query/sql/src/planner/binder/scalar_common.rs index b90fd579107bf..05a37a62f2327 100644 --- a/src/query/sql/src/planner/binder/scalar_common.rs +++ b/src/query/sql/src/planner/binder/scalar_common.rs @@ -134,7 +134,7 @@ fn fold_or_arguments(iter: impl Iterator) -> ScalarExpr { span: None, func_name: "or".to_string(), params: vec![], - arguments: vec![acc, arg.clone()], + arguments: vec![acc, arg], } .into() }, diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index 662f4c209696c..113098480c716 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -88,7 +88,6 @@ use crate::plans::RecursiveCteScan; use crate::plans::RelOperator; use crate::plans::Scan; use crate::plans::SecureFilter; -use crate::plans::Statistics; impl Binder { pub fn bind_dummy_table( @@ -443,7 +442,6 @@ impl Binder { Scan { table_index, columns: columns.into_iter().map(|col| col.index()).collect(), - statistics: Arc::new(Statistics::default()), change_type, sample: sample.clone(), scan_id, diff --git a/src/query/sql/src/planner/metadata/metadata.rs b/src/query/sql/src/planner/metadata/metadata.rs index 962edc6436571..59361ab00e4e3 100644 --- a/src/query/sql/src/planner/metadata/metadata.rs +++ b/src/query/sql/src/planner/metadata/metadata.rs @@ -16,6 +16,7 @@ use std::collections::BTreeSet; use std::collections::HashMap; use std::collections::HashSet; use std::collections::VecDeque; +use std::collections::hash_map::Entry; use std::fmt::Debug; use std::fmt::Formatter; use std::sync::Arc; @@ -73,7 +74,7 @@ pub struct Metadata { non_lazy_columns: ColumnSet, /// Mappings from table index to _row_id column index. table_row_id_index: HashMap, - agg_indexes: HashMap>, + agg_indices: HashMap>, max_column_position: usize, // for CSV /// Scan id of each scan operator. @@ -318,19 +319,29 @@ impl Metadata { column_index } - pub fn add_agg_indexes(&mut self, table: String, agg_indexes: Vec<(u64, String, SExpr)>) { - self.agg_indexes - .entry(table) - .and_modify(|indexes| indexes.extend_from_slice(&agg_indexes)) - .or_insert(agg_indexes); + pub fn add_agg_indices(&mut self, table: String, agg_indices: Vec<(u64, String, SExpr)>) { + match self.agg_indices.entry(table) { + Entry::Occupied(occupied) => occupied.into_mut().extend(agg_indices), + Entry::Vacant(vacant) => { + vacant.insert(agg_indices); + } + } + } + + pub fn agg_indices(&self) -> &HashMap> { + &self.agg_indices + } + + pub fn replace_agg_indices(&mut self, agg_indices: HashMap>) { + self.agg_indices = agg_indices } - pub fn get_agg_indexes(&self, table: &str) -> Option<&[(u64, String, SExpr)]> { - self.agg_indexes.get(table).map(|v| v.as_slice()) + pub fn get_agg_indices(&self, table: &str) -> Option<&[(u64, String, SExpr)]> { + self.agg_indices.get(table).map(|v| v.as_slice()) } - pub fn has_agg_indexes(&self) -> bool { - !self.agg_indexes.is_empty() + pub fn has_agg_indices(&self) -> bool { + !self.agg_indices.is_empty() } fn remove_cte_suffix(mut table_name: String, cte_suffix_name: Option) -> String { diff --git a/src/query/sql/src/planner/optimizer/ir/stats/column_stat.rs b/src/query/sql/src/planner/optimizer/ir/stats/column_stat.rs index 79b407cebe535..562deaf52e7e9 100644 --- a/src/query/sql/src/planner/optimizer/ir/stats/column_stat.rs +++ b/src/query/sql/src/planner/optimizer/ir/stats/column_stat.rs @@ -17,7 +17,6 @@ use std::collections::HashMap; use databend_common_storage::Datum; use databend_common_storage::Histogram; -use super::selectivity::DEFAULT_SELECTIVITY; use crate::IndexType; pub type ColumnStatSet = HashMap; @@ -69,17 +68,4 @@ impl Ndv { Ndv::Max(v) => v, } } - - pub fn equal_selectivity(&self, not: bool) -> f64 { - let ndv = self.value(); - if ndv == 0.0 { - 0.0 - } else { - let selectivity = if not { 1.0 - 1.0 / ndv } else { 1.0 / ndv }; - match self { - Ndv::Stat(_) => selectivity, - Ndv::Max(_) => selectivity.max(DEFAULT_SELECTIVITY), - } - } - } } diff --git a/src/query/sql/src/planner/optimizer/ir/stats/selectivity.rs b/src/query/sql/src/planner/optimizer/ir/stats/selectivity.rs index 195dc932ddf0f..75be9b67e7304 100644 --- a/src/query/sql/src/planner/optimizer/ir/stats/selectivity.rs +++ b/src/query/sql/src/planner/optimizer/ir/stats/selectivity.rs @@ -13,8 +13,6 @@ // limitations under the License. use std::cmp::Ordering; -use std::cmp::max; -use std::collections::HashSet; use databend_common_exception::ErrorCode; use databend_common_exception::Result; @@ -28,17 +26,19 @@ use databend_common_expression::types::NumberScalar; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_storage::DEFAULT_HISTOGRAM_BUCKETS; use databend_common_storage::Datum; -use databend_common_storage::F64; +use databend_common_storage::Histogram; use crate::IndexType; use crate::optimizer::ir::ColumnStat; +use crate::optimizer::ir::ColumnStatSet; use crate::optimizer::ir::HistogramBuilder; -use crate::optimizer::ir::Statistics; +use crate::optimizer::ir::Ndv; use crate::plans::BoundColumnRef; use crate::plans::ComparisonOp; use crate::plans::ConstantExpr; use crate::plans::FunctionCall; use crate::plans::ScalarExpr; +use crate::plans::Visitor; /// A default selectivity factor for a predicate /// that we cannot estimate the selectivity for it. @@ -53,193 +53,142 @@ const FIXED_CHAR_SEL: f64 = 0.5; const ANY_CHAR_SEL: f64 = 0.9; // not 1, since it won't match end-of-string const FULL_WILDCARD_SEL: f64 = 2.0; -pub struct SelectivityEstimator<'a> { +pub struct SelectivityEstimator { pub cardinality: f64, - pub input_stat: &'a mut Statistics, - pub updated_column_indexes: HashSet, + column_stats: ColumnStatSet, + overrides: ColumnStatSet, } -impl<'a> SelectivityEstimator<'a> { - pub fn new( - input_stat: &'a mut Statistics, - cardinality: f64, - updated_column_indexes: HashSet, - ) -> Self { +impl SelectivityEstimator { + pub fn new(input_stat: ColumnStatSet, cardinality: f64) -> Self { Self { cardinality, - input_stat, - updated_column_indexes, + column_stats: input_stat, + overrides: ColumnStatSet::new(), } } - /// Compute the selectivity of a predicate. - pub fn compute_selectivity(&mut self, predicate: &ScalarExpr, update: bool) -> Result { - Ok(match predicate { - ScalarExpr::BoundColumnRef(_) => { - // If a column ref is on top of a predicate, e.g. - // `SELECT * FROM t WHERE c1`, the selectivity is 1. - 1.0 - } - - ScalarExpr::ConstantExpr(constant) => { - if is_true_constant_predicate(constant) { - 1.0 - } else { - 0.0 - } - } - - ScalarExpr::FunctionCall(func) if func.func_name == "and" => { - let left_selectivity = self.compute_selectivity(&func.arguments[0], update)?; - let right_selectivity = self.compute_selectivity(&func.arguments[1], update)?; - left_selectivity.min(right_selectivity) - } - - ScalarExpr::FunctionCall(func) - if matches!(func.func_name.as_str(), "or" | "or_filters") => - { - func.arguments - .iter() - .map(|arg| self.compute_selectivity(arg, false)) - .try_fold(0.0, |acc, p| { - let p = p?; - Result::Ok(acc + p - acc * p) - })? - } + fn merged_column_stats(&self) -> ColumnStatSet { + let mut merged = self.column_stats.clone(); + merged.extend(self.overrides.clone()); + merged + } - ScalarExpr::FunctionCall(func) if func.func_name == "not" => { - match &func.arguments[0] { - ScalarExpr::BoundColumnRef(_) => { - // Not column e.g. - // `SELECT * FROM t WHERE not c1`, the selectivity is 1. - 1.0 - } - ScalarExpr::FunctionCall(func) if func.func_name == "not" => { - // (NOT (NOT predicate)) - self.compute_selectivity(&func.arguments[0], false)? - } - _ => { - let argument_selectivity = - self.compute_selectivity(&func.arguments[0], false)?; - 1.0 - argument_selectivity - } - } - } + pub fn column_stats(&self) -> ColumnStatSet { + self.merged_column_stats() + } - ScalarExpr::FunctionCall(func) => { - if func.func_name.eq("like") { - return self.compute_like_selectivity(func); - } - if func.func_name.eq("is_not_null") { - return self.compute_is_not_null_selectivity(&func.arguments[0]); - } - if let Some(op) = ComparisonOp::try_from_func_name(&func.func_name) { - return self.compute_selectivity_comparison_expr( - op, - &func.arguments[0], - &func.arguments[1], - update, - ); - } + pub fn into_column_stats(self) -> ColumnStatSet { + if self.overrides.is_empty() { + return self.column_stats; + } + let mut merged = self.column_stats; + merged.extend(self.overrides); + merged + } - DEFAULT_SELECTIVITY - } + pub fn apply(&mut self, predicates: &[ScalarExpr]) -> Result { + let expr = match predicates { + [pred] => pred.clone(), + predicates => ScalarExpr::FunctionCall(FunctionCall { + span: None, + func_name: "and_filters".to_string(), + params: vec![], + arguments: predicates.to_vec(), + }), + }; + let mut visitor = SelectivityVisitor { + cardinality: self.cardinality, + selectivity: Selectivity::Unknown, + column_stats: &self.column_stats, + overrides: ColumnStatSet::new(), + }; + visitor.visit(&expr)?; + let selectivity = match visitor.selectivity { + Selectivity::Unknown => DEFAULT_SELECTIVITY, + Selectivity::LowerBound => UNKNOWN_COL_STATS_FILTER_SEL_LOWER_BOUND, + Selectivity::N(n) => n, + }; + self.overrides = visitor.overrides; + self.update_other_statistic_by_selectivity(selectivity); - _ => DEFAULT_SELECTIVITY, - }) + Ok(self.cardinality * selectivity) } - // The method uses probability predication to compute like selectivity. - // The core idea is from postgresql. - fn compute_like_selectivity(&mut self, func: &FunctionCall) -> Result { - let right = &func.arguments[1]; - if let ScalarExpr::ConstantExpr(ConstantExpr { - value: Scalar::String(patt), - .. - }) = right - { - let mut sel = 1.0_f64; - - // Skip any leading %; it's already factored into initial sel - let mut chars = patt.chars().peekable(); - if matches!(chars.peek(), Some(&'%') | Some(&'_')) { - chars.next(); // consume the leading % + // Update other columns' statistic according to selectivity. + pub fn update_other_statistic_by_selectivity(&mut self, selectivity: f64) { + if selectivity == MAX_SELECTIVITY { + return; + } + + for (index, column_stat) in &self.column_stats { + if self.overrides.contains_key(index) { + continue; } + let mut column_stat = column_stat.clone(); + column_stat.ndv = column_stat.ndv.reduce_by_selectivity(selectivity); + column_stat.null_count = (column_stat.null_count as f64 * selectivity).ceil() as u64; - while let Some(c) = chars.next() { - match c { - '%' => sel *= FULL_WILDCARD_SEL, - '_' => sel *= ANY_CHAR_SEL, - '\\' => { - if chars.peek().is_some() { - chars.next(); - } - sel *= FIXED_CHAR_SEL; + if let Some(histogram) = &mut column_stat.histogram { + if histogram.accuracy { + // If selectivity < 0.2, most buckets are invalid and + // the accuracy histogram can be discarded. + // Todo: find a better way to update histogram. + if selectivity < 0.2 { + column_stat.histogram = None; + } + } else if column_stat.ndv.value() as u64 <= 2 { + column_stat.histogram = None; + } else { + for bucket in histogram.buckets.iter_mut() { + bucket.update(selectivity); } - _ => sel *= FIXED_CHAR_SEL, } } - // Could get sel > 1 if multiple wildcards - if sel > 1.0 { - sel = 1.0; - } - Ok(sel) - } else { - Ok(DEFAULT_SELECTIVITY) + self.overrides.insert(*index, column_stat); } } +} - fn compute_is_not_null_selectivity(&mut self, expr: &ScalarExpr) -> Result { - match expr { - ScalarExpr::BoundColumnRef(column_ref) => { - let column_stat = if let Some(stat) = self - .input_stat - .column_stats - .get_mut(&column_ref.column.index) - { - stat - } else { - return Ok(DEFAULT_SELECTIVITY); - }; - if self.cardinality == 0.0 { - return Ok(0.0); - } - let selectivity = - (self.cardinality - column_stat.null_count as f64) / self.cardinality; - Ok(selectivity) - } - _ => Ok(DEFAULT_SELECTIVITY), - } - } +#[derive(Clone)] +struct SelectivityVisitor<'a> { + cardinality: f64, + selectivity: Selectivity, + column_stats: &'a ColumnStatSet, + overrides: ColumnStatSet, +} + +#[derive(Debug, Clone, Copy, Default, enum_as_inner::EnumAsInner)] +pub enum Selectivity { + #[default] + Unknown, + LowerBound, + N(f64), +} - fn compute_selectivity_comparison_expr( +impl SelectivityVisitor<'_> { + fn compute_comparison( &mut self, mut op: ComparisonOp, left: &ScalarExpr, right: &ScalarExpr, - update: bool, - ) -> Result { + ) -> Result { match (left, right) { (ScalarExpr::BoundColumnRef(column_ref), ScalarExpr::ConstantExpr(constant)) | (ScalarExpr::ConstantExpr(constant), ScalarExpr::BoundColumnRef(column_ref)) => { // Check if there is available histogram for the column. - let column_stat = if let Some(stat) = self - .input_stat - .column_stats - .get_mut(&column_ref.column.index) - { - stat - } else { + let column_index = column_ref.column.index; + let Some(column_stat) = self.get_column_stat(column_index) else { // The column is derived column, give a small selectivity currently. // Need to improve it later. // Another case: column is from system table, such as numbers. We shouldn't use numbers() table to test cardinality estimation. - return Ok(UNKNOWN_COL_STATS_FILTER_SEL_LOWER_BOUND); + return Ok(Selectivity::LowerBound); }; let const_datum = if let Some(datum) = Datum::from_scalar(constant.value.clone()) { datum } else { - return Ok(DEFAULT_SELECTIVITY); + return Ok(Selectivity::Unknown); }; return match op { @@ -248,28 +197,24 @@ impl<'a> SelectivityEstimator<'a> { // value to estimate the selectivity. This assumes that // the column is in a uniform distribution. let selectivity = evaluate_equal(column_stat, false, constant); - if update { - update_statistic( - column_stat, - const_datum.clone(), - const_datum, - selectivity, - )?; - self.updated_column_indexes.insert(column_ref.column.index); - } + let column_stat = self + .ensure_column_stat(column_index) + .expect("checked above"); + update_statistic_eq(column_stat, const_datum); Ok(selectivity) } ComparisonOp::NotEqual => { - // For not equal predicate, we treat it as opposite of equal predicate. let selectivity = evaluate_equal(column_stat, true, constant); - if update { + if let Selectivity::N(n) = selectivity { + let column_stat = self + .ensure_column_stat(column_index) + .expect("checked above"); update_statistic( column_stat, column_stat.min.clone(), column_stat.max.clone(), - selectivity, + n, )?; - self.updated_column_indexes.insert(column_ref.column.index); } Ok(selectivity) } @@ -277,14 +222,42 @@ impl<'a> SelectivityEstimator<'a> { if let ScalarExpr::ConstantExpr(_) = left { op = op.reverse(); } - Self::compute_binary_comparison_selectivity( - &op, - &const_datum, - update, - column_ref, - column_stat, - &mut self.updated_column_indexes, - ) + match &column_stat.histogram { + Some(histogram) => { + let selectivity = Self::compute_histogram_comparison( + histogram, + op, + &const_datum, + )?; + if let Selectivity::N(n) = selectivity { + let (new_min, new_max) = match op { + ComparisonOp::GT | ComparisonOp::GTE => { + let new_min = const_datum.clone(); + let new_max = column_stat.max.clone(); + (new_min, new_max) + } + ComparisonOp::LT | ComparisonOp::LTE => { + let new_max = const_datum.clone(); + let new_min = column_stat.min.clone(); + (new_min, new_max) + } + _ => unreachable!(), + }; + let column_stat = self + .ensure_column_stat(column_index) + .expect("checked above"); + update_statistic(column_stat, new_min, new_max, n)?; + } + Ok(selectivity) + } + None => { + if column_ref.column.data_type.remove_nullable().is_integer() { + self.compute_ndv_comparison(op, &const_datum, column_index) + } else { + Ok(Selectivity::Unknown) + } + } + } } }; } @@ -305,7 +278,7 @@ impl<'a> SelectivityEstimator<'a> { .. }) = expr { - return if v { Ok(1.0) } else { Ok(0.0) }; + return Ok(Selectivity::N(if v { 1.0 } else { 0.0 })); } } (ScalarExpr::FunctionCall(func), ScalarExpr::ConstantExpr(val)) => { @@ -323,10 +296,10 @@ impl<'a> SelectivityEstimator<'a> { if let Some(remainder) = Datum::from_scalar(val.value.clone()) { let remainder = remainder.to_double()?; if remainder >= mod_num { - return Ok(0.0); + return Ok(Selectivity::N(0.0)); } } - return Ok(1.0 / mod_num); + return Ok(Selectivity::N(1.0 / mod_num)); } } } @@ -334,183 +307,346 @@ impl<'a> SelectivityEstimator<'a> { _ => (), } - Ok(DEFAULT_SELECTIVITY) + Ok(Selectivity::Unknown) } - // Update other columns' statistic according to selectivity. - pub fn update_other_statistic_by_selectivity(&mut self, selectivity: f64) { - for (index, column_stat) in self.input_stat.column_stats.iter_mut() { - if self.updated_column_indexes.contains(index) { - continue; + fn compute_ndv_comparison( + &mut self, + comparison_op: ComparisonOp, + const_datum: &Datum, + column_index: IndexType, + ) -> Result { + let column_stat = self.ensure_column_stat(column_index).unwrap(); + + let min = column_stat.min.to_double()?; + let max = column_stat.max.to_double()?; + let ndv = column_stat.ndv; + let numeric_literal = const_datum.to_double()?; + + let cmp_min = numeric_literal.total_cmp(&min); + let cmp_max = numeric_literal.total_cmp(&max); + + use Ordering::*; + let selectivity = match (comparison_op, cmp_min, cmp_max) { + (ComparisonOp::LT, Less | Equal, _) => 0.0, + (ComparisonOp::LTE, Less, _) => 0.0, + (ComparisonOp::LTE, Equal, _) => { + update_statistic_eq(column_stat, const_datum.clone()); + return Ok(ndv.equal_selectivity(false)); } - column_stat.ndv = column_stat.ndv.reduce_by_selectivity(selectivity); - - if let Some(histogram) = &mut column_stat.histogram { - if histogram.accuracy { - // If selectivity < 0.2, most buckets are invalid and - // the accuracy histogram can be discarded. - // Todo: find a better way to update histogram. - if selectivity < 0.2 { - column_stat.histogram = None; - } - continue; + (ComparisonOp::LT | ComparisonOp::LTE, Greater, Greater) => 1.0, + (ComparisonOp::LT, Greater, Equal) => { + let selectivity = ndv.equal_selectivity(true); + if let Selectivity::N(n) = selectivity { + update_statistic( + column_stat, + column_stat.min.clone(), + column_stat.max.clone(), + n, + )?; } - if column_stat.ndv.value() as u64 <= 2 { - column_stat.histogram = None; - } else { - for bucket in histogram.buckets.iter_mut() { - bucket.update(selectivity); - } + return Ok(selectivity); + } + (ComparisonOp::LT | ComparisonOp::LTE, _, _) => { + let n = (numeric_literal - min + 1.0) / (max - min + 1.0); + update_statistic(column_stat, column_stat.min.clone(), const_datum.clone(), n)?; + return Ok(Selectivity::N(n)); + } + + (ComparisonOp::GT, _, Greater | Equal) => 0.0, + (ComparisonOp::GTE, _, Greater) => 0.0, + (ComparisonOp::GTE, Less | Equal, _) => 1.0, + (ComparisonOp::GT, Less, _) => 1.0, + (ComparisonOp::GT, Equal, _) => { + let selectivity = ndv.equal_selectivity(true); + if let Selectivity::N(n) = selectivity { + update_statistic( + column_stat, + column_stat.min.clone(), + column_stat.max.clone(), + n, + )?; } + return Ok(selectivity); + } + (ComparisonOp::GTE, _, Equal) => { + update_statistic_eq(column_stat, const_datum.clone()); + return Ok(ndv.equal_selectivity(false)); + } + (ComparisonOp::GT | ComparisonOp::GTE, _, _) => { + let n = (max - numeric_literal + 1.0) / (max - min + 1.0); + update_statistic(column_stat, const_datum.clone(), column_stat.max.clone(), n)?; + return Ok(Selectivity::N(n)); } + + _ => unreachable!(), + }; + if selectivity == 0.0 { + column_stat.ndv = column_stat.ndv.reduce_by_selectivity(0.0); } + Ok(Selectivity::N(selectivity)) } - fn compute_binary_comparison_selectivity( - comparison_op: &ComparisonOp, + fn compute_histogram_comparison( + histogram: &Histogram, + comparison_op: ComparisonOp, const_datum: &Datum, - update: bool, - column_ref: &BoundColumnRef, - column_stat: &mut ColumnStat, - updated_column_indexes: &mut HashSet, - ) -> Result { - let selectivity = match column_stat.histogram.as_ref() { - None if const_datum.is_numeric() => { - // If there is no histogram and the column isn't numeric, return default selectivity. - if !column_stat.min.is_numeric() || !column_stat.max.is_numeric() { - return Ok(DEFAULT_SELECTIVITY); - } + ) -> Result { + let mut num_selected = 0.0; + for bucket in histogram.buckets_iter() { + let lower_bound = bucket.lower_bound(); + let upper_bound = bucket.upper_bound(); + + if !const_datum.type_comparable(lower_bound) { + return Ok(Selectivity::Unknown); + } - let min = column_stat.min.to_double()?; - let max = column_stat.max.to_double()?; - let ndv = column_stat.ndv; - let numeric_literal = const_datum.to_double()?; - - let cmp_min = numeric_literal.total_cmp(&min); - let cmp_max = numeric_literal.total_cmp(&max); - - use Ordering::*; - match (comparison_op, cmp_min, cmp_max) { - (ComparisonOp::LT, Less | Equal, _) => 0.0, - (ComparisonOp::LTE, Less, _) => 0.0, - (ComparisonOp::LTE, Equal, _) => ndv.equal_selectivity(false), - (ComparisonOp::LT | ComparisonOp::LTE, Greater, Greater) => 1.0, - (ComparisonOp::LT, Greater, Equal) => ndv.equal_selectivity(true), - (ComparisonOp::LT | ComparisonOp::LTE, _, _) => { - (numeric_literal - min) / (max - min + 1.0) - } + let const_gte_upper_bound = matches!( + const_datum.compare(upper_bound)?, + Ordering::Greater | Ordering::Equal + ); + let (no_overlap, complete_overlap) = match comparison_op { + ComparisonOp::LT => ( + matches!( + const_datum.compare(lower_bound)?, + Ordering::Less | Ordering::Equal + ), + const_gte_upper_bound, + ), + ComparisonOp::LTE => ( + matches!(const_datum.compare(lower_bound)?, Ordering::Less), + const_gte_upper_bound, + ), + ComparisonOp::GT => ( + const_gte_upper_bound, + matches!(const_datum.compare(lower_bound)?, Ordering::Less), + ), + ComparisonOp::GTE => ( + const_gte_upper_bound, + matches!( + const_datum.compare(lower_bound)?, + Ordering::Less | Ordering::Equal + ), + ), + _ => unreachable!(), + }; - (ComparisonOp::GT, _, Greater | Equal) => 0.0, - (ComparisonOp::GTE, _, Greater) => 0.0, - (ComparisonOp::GTE, Less | Equal, _) => 1.0, - (ComparisonOp::GT, Less, _) => 1.0, - (ComparisonOp::GT, Equal, _) => ndv.equal_selectivity(true), - (ComparisonOp::GTE, _, Equal) => ndv.equal_selectivity(false), - (ComparisonOp::GT | ComparisonOp::GTE, _, _) => { - (max - numeric_literal) / (max - min + 1.0) + if complete_overlap { + num_selected += bucket.num_values(); + } else if !no_overlap && const_datum.is_numeric() { + let ndv = bucket.num_distinct(); + let lower_bound = lower_bound.to_double()?; + let upper_bound = upper_bound.to_double()?; + let const_value = const_datum.to_double()?; + + let bucket_range = upper_bound - lower_bound; + let bucket_selectivity = match comparison_op { + ComparisonOp::LT => (const_value - lower_bound) / bucket_range, + ComparisonOp::LTE => { + if const_value == lower_bound { + 1.0 / ndv + } else { + (const_value - lower_bound + 1.0) / bucket_range + } } - + ComparisonOp::GT => { + if const_value == lower_bound { + 1.0 - 1.0 / ndv + } else { + (upper_bound - const_value - 1.0).max(0.0) / bucket_range + } + } + ComparisonOp::GTE => (upper_bound - const_value) / bucket_range, _ => unreachable!(), - } - } - None => { - return Ok(DEFAULT_SELECTIVITY); + }; + num_selected += bucket.num_values() * bucket_selectivity; } - Some(histogram) => { - let mut num_selected = 0.0; - for bucket in histogram.buckets_iter() { - let lower_bound = bucket.lower_bound(); - let upper_bound = bucket.upper_bound(); - - if !const_datum.can_compare(lower_bound) { - return Ok(DEFAULT_SELECTIVITY); + } + + Ok(Selectivity::N(num_selected / histogram.num_values())) + } + + // The method uses probability predication to compute like selectivity. + // The core idea is from postgresql. + fn compute_like(&mut self, func: &FunctionCall) -> Result { + let ScalarExpr::ConstantExpr(ConstantExpr { + value: Scalar::String(patt), + .. + }) = &func.arguments[1] + else { + return Ok(Selectivity::Unknown); + }; + let mut sel = 1.0_f64; + + // Skip any leading %; it's already factored into initial sel + let mut chars = patt.chars().peekable(); + if matches!(chars.peek(), Some(&'%') | Some(&'_')) { + chars.next(); // consume the leading % + } + + while let Some(c) = chars.next() { + match c { + '%' => sel *= FULL_WILDCARD_SEL, + '_' => sel *= ANY_CHAR_SEL, + '\\' => { + if chars.peek().is_some() { + chars.next(); } + sel *= FIXED_CHAR_SEL; + } + _ => sel *= FIXED_CHAR_SEL, + } + } - let const_gte_upper_bound = matches!( - const_datum.compare(upper_bound)?, - Ordering::Greater | Ordering::Equal - ); - let (no_overlap, complete_overlap) = match comparison_op { - ComparisonOp::LT => ( - matches!( - const_datum.compare(lower_bound)?, - Ordering::Less | Ordering::Equal - ), - const_gte_upper_bound, - ), - ComparisonOp::LTE => ( - matches!(const_datum.compare(lower_bound)?, Ordering::Less), - const_gte_upper_bound, - ), - ComparisonOp::GT => ( - const_gte_upper_bound, - matches!(const_datum.compare(lower_bound)?, Ordering::Less), - ), - ComparisonOp::GTE => ( - const_gte_upper_bound, - matches!( - const_datum.compare(lower_bound)?, - Ordering::Less | Ordering::Equal - ), - ), - _ => unreachable!(), - }; + // Could get sel > 1 if multiple wildcards + if sel > 1.0 { + sel = 1.0; + } + Ok(Selectivity::N(sel)) + } - if complete_overlap { - num_selected += bucket.num_values(); - } else if !no_overlap && const_datum.is_numeric() { - let ndv = bucket.num_distinct(); - let lower_bound = lower_bound.to_double()?; - let upper_bound = upper_bound.to_double()?; - let const_value = const_datum.to_double()?; - - let bucket_range = upper_bound - lower_bound; - let bucket_selectivity = match comparison_op { - ComparisonOp::LT => (const_value - lower_bound) / bucket_range, - ComparisonOp::LTE => { - if const_value == lower_bound { - 1.0 / ndv - } else { - (const_value - lower_bound + 1.0) / bucket_range - } - } - ComparisonOp::GT => { - if const_value == lower_bound { - 1.0 - 1.0 / ndv - } else { - (upper_bound - const_value - 1.0).max(0.0) / bucket_range - } - } - ComparisonOp::GTE => (upper_bound - const_value) / bucket_range, - _ => unreachable!(), - }; - num_selected += bucket.num_values() * bucket_selectivity; + fn compute_is_not_null(&mut self, expr: &ScalarExpr) -> Result { + let ScalarExpr::BoundColumnRef(column_ref) = expr else { + return Ok(Selectivity::Unknown); + }; + let Some(column_stat) = self.get_column_stat(column_ref.column.index) else { + return Ok(Selectivity::Unknown); + }; + if self.cardinality == 0.0 { + return Ok(Selectivity::N(0.0)); + } + Ok(Selectivity::N( + (self.cardinality - column_stat.null_count as f64) / self.cardinality, + )) + } + + fn get_column_stat(&self, index: IndexType) -> Option<&ColumnStat> { + self.overrides + .get(&index) + .or_else(|| self.column_stats.get(&index)) + } + + fn ensure_column_stat(&mut self, index: IndexType) -> Option<&mut ColumnStat> { + if !self.overrides.contains_key(&index) { + let stat = self.column_stats.get(&index)?.clone(); + self.overrides.insert(index, stat); + } + self.overrides.get_mut(&index) + } +} + +impl<'a> Visitor<'a> for SelectivityVisitor<'_> { + fn visit_function_call(&mut self, func: &'a FunctionCall) -> Result<()> { + match func.func_name.as_str() { + "and_filters" => { + let mut has_unknown = false; + let mut has_lower_bound = false; + let mut acc = 1.0_f64; + for arg in &func.arguments { + let mut sub_visitor = Self { + cardinality: self.cardinality, + selectivity: Selectivity::Unknown, + column_stats: self.column_stats, + overrides: self.overrides.clone(), + }; + sub_visitor.visit(arg)?; + match sub_visitor.selectivity { + Selectivity::Unknown => has_unknown = true, + Selectivity::LowerBound => has_lower_bound = true, + Selectivity::N(n) => acc = acc.min(n), } + self.overrides.extend(sub_visitor.overrides); } - num_selected / histogram.num_values() + self.selectivity = + if (!has_unknown && !has_lower_bound) || acc < DEFAULT_SELECTIVITY { + Selectivity::N(acc) + } else if has_unknown { + Selectivity::Unknown + } else if has_lower_bound { + Selectivity::LowerBound + } else { + Selectivity::Unknown + } } - }; - if update { - let (new_min, new_max) = match comparison_op { - ComparisonOp::GT | ComparisonOp::GTE => { - let new_min = const_datum.clone(); - let new_max = column_stat.max.clone(); - (new_min, new_max) + "or_filters" => { + let mut has_unknown = false; + let mut has_lower_bound = false; + let mut acc = 0.0_f64; + for arg in &func.arguments { + let mut sub_visitor = Self { + cardinality: self.cardinality, + selectivity: Selectivity::Unknown, + column_stats: self.column_stats, + overrides: self.overrides.clone(), + }; + sub_visitor.visit(arg)?; + match sub_visitor.selectivity { + Selectivity::Unknown => has_unknown = true, + Selectivity::LowerBound => has_lower_bound = true, + Selectivity::N(n) => acc += (1.0 - acc) * n, + } } - ComparisonOp::LT | ComparisonOp::LTE => { - let new_max = const_datum.clone(); - let new_min = column_stat.min.clone(); - (new_min, new_max) + self.selectivity = if (!has_unknown || acc > DEFAULT_SELECTIVITY) + && !has_lower_bound + || acc > UNKNOWN_COL_STATS_FILTER_SEL_LOWER_BOUND + { + Selectivity::N(acc) + } else if has_lower_bound { + Selectivity::LowerBound + } else { + Selectivity::Unknown } - _ => unreachable!(), - }; - update_statistic(column_stat, new_min, new_max, selectivity)?; - updated_column_indexes.insert(column_ref.column.index); + } + + "not" => { + let mut sub_visitor = Self { + cardinality: self.cardinality, + selectivity: Selectivity::Unknown, + column_stats: self.column_stats, + overrides: self.overrides.clone(), + }; + sub_visitor.visit(&func.arguments[0])?; + + self.selectivity = match sub_visitor.selectivity { + Selectivity::N(n) => Selectivity::N(1.0 - n), + selectivity => selectivity, + }; + } + + "like" => { + self.selectivity = self.compute_like(func)?; + } + + "is_not_null" => { + self.selectivity = self.compute_is_not_null(&func.arguments[0])?; + } + + func_name => { + if let Some(op) = ComparisonOp::try_from_func_name(func_name) { + self.selectivity = + self.compute_comparison(op, &func.arguments[0], &func.arguments[1])?; + } else { + self.selectivity = Selectivity::Unknown; + } + } } + Ok(()) + } - Ok(selectivity) + fn visit_bound_column_ref(&mut self, _: &'a BoundColumnRef) -> Result<()> { + self.selectivity = Selectivity::LowerBound; + Ok(()) + } + + fn visit_constant(&mut self, constant: &'a ConstantExpr) -> Result<()> { + self.selectivity = if is_true_constant_predicate(constant) { + Selectivity::N(1.0) + } else { + Selectivity::N(0.0) + }; + Ok(()) } } @@ -526,15 +662,15 @@ fn is_true_constant_predicate(constant: &ConstantExpr) -> bool { } } -fn evaluate_equal(column_stat: &ColumnStat, not_eq: bool, constant: &ConstantExpr) -> f64 { +fn evaluate_equal(column_stat: &ColumnStat, not_eq: bool, constant: &ConstantExpr) -> Selectivity { match &constant.value { - Scalar::Null => return if not_eq { 1.0 } else { 0.0 }, - _ => { - if let Some(constant) = Datum::from_scalar(constant.value.clone()) + Scalar::Null => return Selectivity::N(if not_eq { 1.0 } else { 0.0 }), + value => { + if let Some(constant) = Datum::from_scalar(value.clone()) && (matches!(constant.compare(&column_stat.min), Ok(Ordering::Less)) || matches!(constant.compare(&column_stat.max), Ok(Ordering::Greater))) { - return if not_eq { 1.0 } else { 0.0 }; + return Selectivity::N(if not_eq { 1.0 } else { 0.0 }); } } } @@ -544,44 +680,317 @@ fn evaluate_equal(column_stat: &ColumnStat, not_eq: bool, constant: &ConstantExp fn update_statistic( column_stat: &mut ColumnStat, - mut new_min: Datum, - mut new_max: Datum, + new_min: Datum, + new_max: Datum, selectivity: f64, ) -> Result<()> { column_stat.ndv = column_stat.ndv.reduce_by_selectivity(selectivity); - - if matches!( - new_min, - Datum::Bool(_) | Datum::Int(_) | Datum::UInt(_) | Datum::Float(_) - ) { - new_min = Datum::Float(F64::from(new_min.to_double()?)); - new_max = Datum::Float(F64::from(new_max.to_double()?)); - } column_stat.min = new_min.clone(); column_stat.max = new_max.clone(); + column_stat.null_count = (column_stat.null_count as f64 * selectivity).ceil() as u64; if let Some(histogram) = &column_stat.histogram { // If selectivity < 0.2, most buckets are invalid and // the accuracy histogram can be discarded. // Todo: support unfixed buckets number for histogram and prune the histogram. - column_stat.histogram = if histogram.accuracy && selectivity >= 0.2 { - Some(histogram.clone()) - } else { + if !histogram.accuracy || selectivity < 0.2 { let num_values = histogram.num_values(); let new_num_values = (num_values * selectivity).ceil() as u64; let new_ndv = column_stat.ndv.value() as u64; - if new_ndv <= 2 { - column_stat.histogram = None; - return Ok(()); + column_stat.histogram = if new_ndv <= 2 { + None + } else { + Some(HistogramBuilder::from_ndv( + new_ndv, + new_num_values.max(new_ndv), + Some((new_min, new_max)), + DEFAULT_HISTOGRAM_BUCKETS, + )?) } - Some(HistogramBuilder::from_ndv( - new_ndv, - max(new_num_values, new_ndv), - Some((new_min, new_max)), - DEFAULT_HISTOGRAM_BUCKETS, - )?) } } Ok(()) } + +fn update_statistic_eq(column_stat: &mut ColumnStat, value: Datum) { + column_stat.min = value.clone(); + column_stat.max = value; + column_stat.ndv = Ndv::Stat(1.0); + column_stat.null_count = 0; + column_stat.histogram = None; +} + +impl Ndv { + pub fn equal_selectivity(&self, not: bool) -> Selectivity { + let ndv = self.value(); + if ndv == 0.0 { + Selectivity::N(0.0) + } else { + let selectivity = if not { 1.0 - 1.0 / ndv } else { 1.0 / ndv }; + match self { + Ndv::Stat(_) => Selectivity::N(selectivity), + Ndv::Max(_) => Selectivity::LowerBound, + } + } + } +} + +#[cfg(test)] +mod tests { + use std::io::Write; + + use databend_common_exception::Result; + use databend_common_expression::RawExpr; + use databend_common_expression::types::ArgType; + use databend_common_expression::types::DataType; + use databend_common_expression::types::UInt64Type; + use databend_common_functions::test_utils::parse_raw_expr; + use goldenfile::Mint; + + use super::*; + use crate::ColumnBindingBuilder; + use crate::Visibility; + use crate::optimizer::ir::Ndv; + use crate::plans::BoundColumnRef; + use crate::plans::CastExpr; + use crate::plans::ConstantExpr; + use crate::plans::FunctionCall; + use crate::plans::ScalarExpr; + + fn run_test( + file: &mut impl Write, + expr_text: &str, + columns: &[(&str, DataType)], + column_stats: ColumnStatSet, + ) -> Result<()> { + writeln!(file, "expr : {expr_text}")?; + + let in_stats = column_stats_to_string(&column_stats); + let raw_expr = parse_raw_expr(expr_text, columns); + let expr = raw_expr_to_scalar(&raw_expr, columns); + let cardinality = 100.0; + let mut estimator = SelectivityEstimator::new(column_stats, cardinality); + let estimated_rows = estimator.apply(&[expr])?; + let out_stats = estimator.column_stats(); + + writeln!(file, "cardinality : {cardinality}")?; + writeln!(file, "estimated : {estimated_rows}")?; + writeln!(file, "in stats :\n{in_stats}")?; + writeln!( + file, + "out stats :\n{}", + column_stats_to_string(&out_stats) + )?; + + writeln!(file)?; + Ok(()) + } + + fn column_stats_to_string(column_stats: &ColumnStatSet) -> String { + let mut keys = column_stats.keys().copied().collect::>(); + keys.sort(); + + keys.iter() + .map(|i| format!("{i} {:?}", column_stats[i])) + .collect::>() + .join("\n") + } + + fn raw_expr_to_scalar(raw_expr: &RawExpr, columns: &[(&str, DataType)]) -> ScalarExpr { + match raw_expr { + RawExpr::Constant { scalar, .. } => ScalarExpr::ConstantExpr(ConstantExpr { + span: None, + value: scalar.clone(), + }), + RawExpr::ColumnRef { id, .. } => { + let index = *id; + let (name, data_type) = &columns[index]; + let column = ColumnBindingBuilder::new( + name.to_string(), + index, + Box::new(data_type.clone()), + Visibility::Visible, + ) + .build(); + ScalarExpr::BoundColumnRef(BoundColumnRef { span: None, column }) + } + RawExpr::Cast { + expr, + dest_type, + is_try, + .. + } => ScalarExpr::CastExpr(CastExpr { + span: None, + is_try: *is_try, + argument: Box::new(raw_expr_to_scalar(expr, columns)), + target_type: Box::new(dest_type.clone()), + }), + RawExpr::FunctionCall { + name, args, params, .. + } => ScalarExpr::FunctionCall(FunctionCall { + span: None, + func_name: name.clone(), + params: params.clone(), + arguments: args + .iter() + .map(|arg| raw_expr_to_scalar(arg, columns)) + .collect(), + }), + RawExpr::LambdaFunctionCall { .. } => { + unreachable!("lambda expressions are not used in tests") + } + } + } + + #[test] + fn test_selectivity() -> Result<()> { + let mut mint = Mint::new("tests/ut/testdata"); + let file = &mut mint.new_goldenfile("selectivity.txt").unwrap(); + + test_comparison(file)?; + test_logic(file)?; + test_mod(file)?; + test_like(file)?; + + Ok(()) + } + + fn test_comparison(file: &mut impl Write) -> Result<()> { + let column_stats = ColumnStatSet::from_iter([ + (0, ColumnStat { + min: Datum::UInt(10), + max: Datum::UInt(20), + ndv: Ndv::Stat(10.0), + null_count: 0, + histogram: None, + }), + (1, ColumnStat { + min: Datum::UInt(10), + max: Datum::UInt(20), + ndv: Ndv::Stat(10.0), + null_count: 10, + histogram: None, + }), + ]); + let columns = &[("a", UInt64Type::data_type())]; + + run_test(file, "a = 5", columns, column_stats.clone())?; + run_test(file, "a = 15", columns, column_stats.clone())?; + + run_test(file, "a != 5", columns, column_stats.clone())?; + run_test(file, "a != 15", columns, column_stats.clone())?; + + run_test(file, "a > 5", columns, column_stats.clone())?; + run_test(file, "a > 10", columns, column_stats.clone())?; + run_test(file, "a > 17", columns, column_stats.clone())?; + run_test(file, "a > 20", columns, column_stats.clone())?; + run_test(file, "a > 25", columns, column_stats.clone())?; + + run_test(file, "a >= 5", columns, column_stats.clone())?; + run_test(file, "a >= 10", columns, column_stats.clone())?; + run_test(file, "a >= 17", columns, column_stats.clone())?; + run_test(file, "a >= 20", columns, column_stats.clone())?; + run_test(file, "a >= 25", columns, column_stats.clone())?; + + run_test(file, "a < 5", columns, column_stats.clone())?; + run_test(file, "a < 10", columns, column_stats.clone())?; + run_test(file, "a < 17", columns, column_stats.clone())?; + run_test(file, "a < 20", columns, column_stats.clone())?; + run_test(file, "a < 25", columns, column_stats.clone())?; + + run_test(file, "a <= 5", columns, column_stats.clone())?; + run_test(file, "a <= 10", columns, column_stats.clone())?; + run_test(file, "a <= 17", columns, column_stats.clone())?; + run_test(file, "a <= 20", columns, column_stats.clone())?; + run_test(file, "a <= 25", columns, column_stats.clone())?; + + Ok(()) + } + + fn test_logic(file: &mut impl Write) -> Result<()> { + let column_stats = ColumnStatSet::from_iter([ + (0, ColumnStat { + min: Datum::UInt(0), + max: Datum::UInt(9), + ndv: Ndv::Stat(10.0), + null_count: 0, + histogram: None, + }), + (1, ColumnStat { + min: Datum::UInt(0), + max: Datum::UInt(9), + ndv: Ndv::Stat(10.0), + null_count: 10, + histogram: None, + }), + ]); + let columns = &[("a", UInt64Type::data_type())]; + + run_test( + file, + "and_filters(a = 5, a > 3)", + columns, + column_stats.clone(), + )?; + + run_test( + file, + "or_filters(a = 5, a = 6)", + columns, + column_stats.clone(), + )?; + + run_test(file, "not(a = 5)", columns, column_stats.clone())?; + + run_test( + file, + "is_not_null(b)", + &[ + ("a", UInt64Type::data_type()), + ("b", UInt64Type::data_type().wrap_nullable()), + ], + column_stats.clone(), + )?; + Ok(()) + } + + fn test_mod(file: &mut impl Write) -> Result<()> { + let column_stats = ColumnStatSet::from_iter([ + (0, ColumnStat { + min: Datum::UInt(0), + max: Datum::UInt(9), + ndv: Ndv::Stat(10.0), + null_count: 0, + histogram: None, + }), + (1, ColumnStat { + min: Datum::UInt(0), + max: Datum::UInt(9), + ndv: Ndv::Stat(10.0), + null_count: 10, + histogram: None, + }), + ]); + let columns = &[("a", UInt64Type::data_type())]; + + run_test(file, "a % 4 = 1", columns, column_stats.clone())?; + run_test(file, "a % 4 = 5", columns, column_stats.clone())?; + Ok(()) + } + + fn test_like(file: &mut impl Write) -> Result<()> { + let columns = &[("s", DataType::String)]; + let column_stats = ColumnStatSet::from_iter([(0, ColumnStat { + min: Datum::Bytes("aa".as_bytes().to_vec()), + max: Datum::Bytes("zz".as_bytes().to_vec()), + ndv: Ndv::Stat(52.0), + null_count: 0, + histogram: None, + })]); + run_test(file, "s like 'ab%'", columns, column_stats.clone())?; + run_test(file, "s like '%ab_'", columns, column_stats.clone())?; + + Ok(()) + } +} diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/agg_index/query_rewrite.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/agg_index/query_rewrite.rs index 4527313e97145..bda75b00c5eb1 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/agg_index/query_rewrite.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/agg_index/query_rewrite.rs @@ -437,9 +437,10 @@ impl PredicatesSplitter { fn split(&mut self, pred: &ScalarExpr, column_map: &HashMap) { if let ScalarExpr::FunctionCall(func) = pred { match func.func_name.as_str() { - "and" => { - self.split(&func.arguments[0], column_map); - self.split(&func.arguments[1], column_map); + "and" | "and_filters" => { + for arg in &func.arguments { + self.split(arg, column_map); + } } "eq" if matches!(func.arguments[0], ScalarExpr::BoundColumnRef(_)) && matches!(func.arguments[1], ScalarExpr::BoundColumnRef(_)) => @@ -834,6 +835,7 @@ impl RangeClasses { } } +#[derive(Debug)] struct ResidualClasses { residual_preds: BTreeMap, } diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_try_apply_agg_index.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_try_apply_agg_index.rs index c980fc56eb1c1..f89ef2e8c9f38 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_try_apply_agg_index.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/agg_rules/rule_try_apply_agg_index.rs @@ -229,12 +229,11 @@ impl Rule for RuleTryApplyAggIndex { ) -> Result<()> { let (table_index, table_name) = self.get_table(s_expr); let metadata = self.metadata.read(); - let index_plans = metadata.get_agg_indexes(&table_name); - if index_plans.is_none() { + let index_plans = metadata.get_agg_indices(&table_name); + let Some(index_plans) = index_plans else { // No enterprise license or no index. return Ok(()); - } - let index_plans = index_plans.unwrap(); + }; if index_plans.is_empty() { // No enterprise license or no index. return Ok(()); diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/factory.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/factory.rs index 125564d0939cc..475fa8f45d32e 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/factory.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/factory.rs @@ -26,7 +26,6 @@ use crate::optimizer::optimizers::rule::RuleEliminateFilter; use crate::optimizer::optimizers::rule::RuleEliminateSelfJoin; use crate::optimizer::optimizers::rule::RuleEliminateSort; use crate::optimizer::optimizers::rule::RuleEliminateUnion; -use crate::optimizer::optimizers::rule::RuleFilterFlattenOr; use crate::optimizer::optimizers::rule::RuleFilterNulls; use crate::optimizer::optimizers::rule::RuleFoldCountAggregate; use crate::optimizer::optimizers::rule::RuleGroupingSetsToUnion; @@ -76,7 +75,6 @@ impl RuleFactory { RuleID::FilterNulls => Ok(Box::new(RuleFilterNulls::new( ctx.get_enable_distributed_optimization(), ))), - RuleID::FilterFlattenOr => Ok(Box::new(RuleFilterFlattenOr::new())), RuleID::PushDownFilterUnion => Ok(Box::new(RulePushDownFilterUnion::new())), RuleID::PushDownFilterEvalScalar => Ok(Box::new(RulePushDownFilterEvalScalar::new())), RuleID::PushDownFilterJoin => Ok(Box::new(RulePushDownFilterJoin::new(metadata))), diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/mod.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/mod.rs index 8a129ffec9096..a9ae55f60c4d9 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/mod.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/mod.rs @@ -13,7 +13,6 @@ // limitations under the License. mod rule_eliminate_filter; -mod rule_filter_flatten_or; mod rule_filter_nulls; mod rule_merge_filter; mod rule_merge_filter_into_mutation; @@ -32,7 +31,6 @@ mod rule_push_down_sort_filter_scan; mod rule_push_down_sort_scan; pub use rule_eliminate_filter::RuleEliminateFilter; -pub use rule_filter_flatten_or::RuleFilterFlattenOr; pub use rule_filter_nulls::RuleFilterNulls; pub use rule_merge_filter::RuleMergeFilter; pub use rule_merge_filter_into_mutation::RuleMergeFilterIntoMutation; diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_eliminate_filter.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_eliminate_filter.rs index 858d4b660b324..b0e55477344cc 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_eliminate_filter.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_eliminate_filter.rs @@ -26,7 +26,8 @@ use crate::optimizer::ir::SExpr; use crate::optimizer::optimizers::rule::Rule; use crate::optimizer::optimizers::rule::RuleID; use crate::optimizer::optimizers::rule::TransformResult; -use crate::optimizer::optimizers::rule::constant::is_falsy; +use crate::optimizer::optimizers::rule::is_falsy; +use crate::optimizer::optimizers::rule::is_true; use crate::plans::ConstantTableScan; use crate::plans::Filter; use crate::plans::Operator; @@ -35,7 +36,6 @@ use crate::plans::RelOperator; use crate::plans::ScalarExpr; pub struct RuleEliminateFilter { - id: RuleID, matchers: Vec, metadata: MetadataRef, } @@ -43,7 +43,6 @@ pub struct RuleEliminateFilter { impl RuleEliminateFilter { pub fn new(metadata: MetadataRef) -> Self { Self { - id: RuleID::EliminateFilter, // Filter // \ // * @@ -58,7 +57,7 @@ impl RuleEliminateFilter { impl Rule for RuleEliminateFilter { fn id(&self) -> RuleID { - self.id + RuleID::EliminateFilter } fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> { @@ -79,14 +78,11 @@ impl Rule for RuleEliminateFilter { .clone(); let metadata = self.metadata.read(); - let mut fields = Vec::with_capacity(output_columns.len()); + let fields = output_columns + .iter() + .map(|col| DataField::new(&col.to_string(), metadata.column(*col).data_type())) + .collect(); - for col in output_columns.iter() { - fields.push(DataField::new( - &col.to_string(), - metadata.column(*col).data_type(), - )); - } let empty_scan = ConstantTableScan::new_empty_scan(DataSchemaRefExt::create(fields), output_columns); let result = SExpr::create_leaf(Arc::new(RelOperator::ConstantTableScan(empty_scan))); @@ -133,18 +129,18 @@ impl Rule for RuleEliminateFilter { true } } - _ => true, + predicate => !is_true(predicate), }) .collect::>(); if predicates.is_empty() { - state.add_result(s_expr.child(0)?.clone()); + state.add_result(s_expr.unary_child().clone()); } else if origin_predicates.len() != predicates.len() { - let filter = Filter { predicates }; - state.add_result(SExpr::create_unary( - Arc::new(filter.into()), - Arc::new(s_expr.child(0)?.clone()), - )); + state.add_result( + s_expr + .unary_child_arc() + .ref_build_unary(Filter { predicates }), + ); } Ok(()) } diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_filter_flatten_or.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_filter_flatten_or.rs deleted file mode 100644 index 3acf57ceec29d..0000000000000 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_filter_flatten_or.rs +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use std::sync::Arc; - -use databend_common_exception::Result; -use databend_common_expression::Scalar; - -use crate::optimizer::ir::Matcher; -use crate::optimizer::ir::SExpr; -use crate::optimizer::optimizers::rule::Rule; -use crate::optimizer::optimizers::rule::RuleID; -use crate::optimizer::optimizers::rule::TransformResult; -use crate::plans::ConstantExpr; -use crate::plans::Filter; -use crate::plans::FunctionCall; -use crate::plans::RelOp; -use crate::plans::ScalarExpr; - -pub struct RuleFilterFlattenOr { - id: RuleID, - matchers: Vec, -} - -impl RuleFilterFlattenOr { - pub fn new() -> Self { - Self { - id: RuleID::FilterFlattenOr, - // Filter - // \ - // * - matchers: vec![Matcher::MatchOp { - op_type: RelOp::Filter, - children: vec![Matcher::Leaf], - }], - } - } -} - -impl Default for RuleFilterFlattenOr { - fn default() -> Self { - Self::new() - } -} - -impl Rule for RuleFilterFlattenOr { - fn id(&self) -> RuleID { - self.id - } - - fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> { - let mut filter: Filter = s_expr.plan().clone().try_into()?; - let mut has_replace = false; - - for predicate in filter.predicates.iter_mut() { - let mut or_exprs = Vec::new(); - flatten_or_expr(predicate, &mut or_exprs); - - if or_exprs.len() > 2 { - let replace_expr = FunctionCall { - span: None, - func_name: "or_filters".to_string(), - params: vec![], - arguments: or_exprs, - } - .into(); - if predicate == &replace_expr { - continue; - } - *predicate = replace_expr; - has_replace = true - } - } - if !has_replace { - state.add_result(s_expr.clone()); - return Ok(()); - } - let mut res = - SExpr::create_unary(Arc::new(filter.into()), Arc::new(s_expr.child(0)?.clone())); - res.set_applied_rule(&self.id()); - state.add_result(res); - - Ok(()) - } - - fn matchers(&self) -> &[Matcher] { - &self.matchers - } -} - -fn flatten_or_expr(expr: &ScalarExpr, or_exprs: &mut Vec) { - match expr { - ScalarExpr::FunctionCall(func) - if matches!(func.func_name.as_str(), "or" | "or_filters") => - { - for argument in func.arguments.iter() { - flatten_or_expr(argument, or_exprs); - } - } - ScalarExpr::ConstantExpr(ConstantExpr { value, span }) if value.is_null() => { - // predicates cannot directly pass null - or_exprs.push( - ConstantExpr { - span: *span, - value: Scalar::Boolean(false), - } - .into(), - ) - } - _ => or_exprs.push(expr.clone()), - } -} diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_merge_filter.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_merge_filter.rs index 7caa778051cd1..9372e3075cf1a 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_merge_filter.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/filter_rules/rule_merge_filter.rs @@ -12,8 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - use databend_common_exception::Result; use crate::optimizer::ir::Matcher; @@ -26,14 +24,12 @@ use crate::plans::RelOp; // Merge two adjacent `Filter`s into one pub struct RuleMergeFilter { - id: RuleID, matchers: Vec, } impl RuleMergeFilter { pub fn new() -> Self { Self { - id: RuleID::MergeFilter, // Filter // \ // Filter @@ -52,25 +48,26 @@ impl RuleMergeFilter { impl Rule for RuleMergeFilter { fn id(&self) -> RuleID { - self.id + RuleID::MergeFilter } fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> { - let up_filter: Filter = s_expr.plan().clone().try_into()?; - let down_filter: Filter = s_expr.child(0)?.plan().clone().try_into()?; + let up_filter = s_expr.plan().as_filter().unwrap(); + let down_filter = s_expr.unary_child().plan().as_filter().unwrap(); let predicates = up_filter .predicates - .into_iter() - .chain(down_filter.predicates) + .iter() + .chain(down_filter.predicates.iter()) + .cloned() .collect(); - let merged = Filter { predicates }; - let new_expr = SExpr::create_unary( - Arc::new(merged.into()), - Arc::new(s_expr.child(0)?.child(0)?.clone()), + state.add_result( + s_expr + .unary_child() + .unary_child_arc() + .ref_build_unary(Filter { predicates }), ); - state.add_result(new_expr); Ok(()) } diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/join_rules/push_down_filter_join/extract_or_predicates.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/join_rules/push_down_filter_join/extract_or_predicates.rs index 332852f89b551..38296861b5288 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/join_rules/push_down_filter_join/extract_or_predicates.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/join_rules/push_down_filter_join/extract_or_predicates.rs @@ -17,14 +17,13 @@ use itertools::Itertools; use crate::ColumnSet; use crate::ScalarExpr; -use crate::optimizer::ir::RelExpr; use crate::optimizer::ir::SExpr; use crate::plans::Filter; use crate::plans::FunctionCall; pub fn rewrite_predicates(s_expr: &SExpr) -> Result> { let filter: Filter = s_expr.plan().clone().try_into()?; - let join = s_expr.child(0)?; + let join = s_expr.unary_child(); let mut new_predicates = Vec::new(); let mut origin_predicates = filter.predicates.clone(); for predicate in filter.predicates.iter() { @@ -33,8 +32,7 @@ pub fn rewrite_predicates(s_expr: &SExpr) -> Result> { if matches!(func.func_name.as_str(), "or" | "or_filters") => { for join_child in join.children() { - let rel_expr = RelExpr::with_s_expr(join_child); - let prop = rel_expr.derive_relational_prop()?; + let prop = join_child.derive_relational_prop()?; if let Some(predicate) = extract_or_predicate(&func.arguments, &prop.used_columns)? { @@ -62,7 +60,9 @@ fn extract_or_predicate( for or_arg in flatten_or_args.iter() { let mut sub_scalars = Vec::new(); match or_arg { - ScalarExpr::FunctionCall(func) if func.func_name == "and" => { + ScalarExpr::FunctionCall(func) + if matches!(func.func_name.as_str(), "and" | "and_filters") => + { let and_args = flatten_and(&func.arguments); for and_arg in and_args.iter() { match and_arg { @@ -95,11 +95,11 @@ fn extract_or_predicate( return Ok(None); } - extracted_scalars.push(make_and_expr(&sub_scalars)); + extracted_scalars.push(make_and_expr(sub_scalars)); } if !extracted_scalars.is_empty() { - return Ok(Some(make_or_expr(&extracted_scalars))); + return Ok(Some(make_or_expr(extracted_scalars))); } Ok(None) @@ -128,7 +128,9 @@ fn flatten_and(and_args: &[ScalarExpr]) -> Vec { let mut flattened_and = Vec::new(); for and_arg in and_args.iter() { match and_arg { - ScalarExpr::FunctionCall(func) if func.func_name == "and" => { + ScalarExpr::FunctionCall(func) + if matches!(func.func_name.as_str(), "and" | "and_filters") => + { flattened_and.extend(flatten_and(&func.arguments)); } _ => flattened_and.push(and_arg.clone()), @@ -138,33 +140,29 @@ fn flatten_and(and_args: &[ScalarExpr]) -> Vec { } // Merge predicates to AND scalar -fn make_and_expr(scalars: &[ScalarExpr]) -> ScalarExpr { - scalars - .iter() - .cloned() - .reduce(|lhs, rhs| { - ScalarExpr::FunctionCall(FunctionCall { - span: None, - func_name: "and".to_string(), - params: vec![], - arguments: vec![lhs, rhs], - }) - }) - .unwrap() +fn make_and_expr(mut scalars: Vec) -> ScalarExpr { + if scalars.len() == 1 { + return scalars.pop().unwrap(); + } + FunctionCall { + span: None, + func_name: "and_filters".to_string(), + params: vec![], + arguments: scalars, + } + .into() } // Merge predicates to OR scalar -fn make_or_expr(scalars: &[ScalarExpr]) -> ScalarExpr { - scalars - .iter() - .cloned() - .reduce(|lhs, rhs| { - ScalarExpr::FunctionCall(FunctionCall { - span: None, - func_name: "or".to_string(), - params: vec![], - arguments: vec![lhs, rhs], - }) - }) - .unwrap() +fn make_or_expr(mut scalars: Vec) -> ScalarExpr { + if scalars.len() == 1 { + return scalars.pop().unwrap(); + } + FunctionCall { + span: None, + func_name: "or_filters".to_string(), + params: vec![], + arguments: scalars, + } + .into() } diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/rule.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/rule.rs index 2c690428ce4e3..28430c2d41b3a 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/rule.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/rule.rs @@ -31,11 +31,10 @@ pub static DEFAULT_REWRITE_RULES: LazyLock> = LazyLock::new(|| { RuleID::EliminateUnion, RuleID::MergeEvalScalar, // Filter + RuleID::NormalizeScalarFilter, RuleID::FilterNulls, - RuleID::FilterFlattenOr, RuleID::EliminateFilter, RuleID::MergeFilter, - RuleID::NormalizeScalarFilter, RuleID::PushDownFilterUnion, RuleID::PushDownFilterAggregate, RuleID::PushDownFilterWindow, @@ -100,7 +99,6 @@ pub enum RuleID { PushDownFilterAggregate, PushDownFilterEvalScalar, FilterNulls, - FilterFlattenOr, PushDownFilterUnion, PushDownFilterJoin, PushDownFilterScan, @@ -150,7 +148,6 @@ impl Display for RuleID { fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { match self { RuleID::FilterNulls => write!(f, "FilterNulls"), - RuleID::FilterFlattenOr => write!(f, "FilterFlattenOr"), RuleID::PushDownFilterUnion => write!(f, "PushDownFilterUnion"), RuleID::PushDownFilterEvalScalar => write!(f, "PushDownFilterEvalScalar"), RuleID::PushDownFilterJoin => write!(f, "PushDownFilterJoin"), diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/scalar_rules/rule_eliminate_eval_scalar.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/scalar_rules/rule_eliminate_eval_scalar.rs index 62c529ff52245..cef7329c0b285 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/scalar_rules/rule_eliminate_eval_scalar.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/scalar_rules/rule_eliminate_eval_scalar.rs @@ -62,7 +62,7 @@ impl Rule for RuleEliminateEvalScalar { return Ok(()); } - if self.metadata.read().has_agg_indexes() { + if self.metadata.read().has_agg_indices() { return Ok(()); } diff --git a/src/query/sql/src/planner/optimizer/optimizers/rule/scalar_rules/rule_normalize_scalar.rs b/src/query/sql/src/planner/optimizer/optimizers/rule/scalar_rules/rule_normalize_scalar.rs index 6f4a4cdfff9eb..62f491f6db649 100644 --- a/src/query/sql/src/planner/optimizer/optimizers/rule/scalar_rules/rule_normalize_scalar.rs +++ b/src/query/sql/src/planner/optimizer/optimizers/rule/scalar_rules/rule_normalize_scalar.rs @@ -12,8 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; - +use databend_common_ast::Span; use databend_common_exception::Result; use databend_common_expression::Scalar; @@ -21,86 +20,79 @@ use crate::optimizer::ir::Matcher; use crate::optimizer::ir::SExpr; use crate::optimizer::optimizers::rule::Rule; use crate::optimizer::optimizers::rule::RuleID; +use crate::optimizer::optimizers::rule::TransformResult; use crate::optimizer::optimizers::rule::constant::is_falsy; use crate::optimizer::optimizers::rule::constant::is_true; use crate::plans::ConstantExpr; use crate::plans::Filter; +use crate::plans::FunctionCall; use crate::plans::RelOp; use crate::plans::ScalarExpr; +use crate::plans::VisitorMut; -fn normalize_predicates(predicates: Vec) -> Vec { - [remove_true_predicate, normalize_falsy_predicate] - .into_iter() - .fold(predicates, |acc, f| f(acc)) -} - -fn remove_true_predicate(predicates: Vec) -> Vec { - predicates.into_iter().filter(|p| !is_true(p)).collect() -} - -fn normalize_falsy_predicate(predicates: Vec) -> Vec { - if predicates.iter().any(is_falsy) { - vec![ - ConstantExpr { - span: None, - value: Scalar::Boolean(false), - } - .into(), - ] - } else { - predicates - } -} - -/// Rule to normalize a Filter, including: -/// - Remove true predicates -/// - If there is a NULL or FALSE conjunction, replace the -/// whole filter with FALSE pub struct RuleNormalizeScalarFilter { - id: RuleID, matchers: Vec, } impl RuleNormalizeScalarFilter { pub fn new() -> Self { Self { - id: RuleID::NormalizeScalarFilter, - // Filter - // \ - // * - matchers: vec![Matcher::MatchOp { - op_type: RelOp::Filter, - children: vec![Matcher::Leaf], - }], + matchers: vec![ + // Filter + // \ + // * + Matcher::MatchOp { + op_type: RelOp::Filter, + children: vec![Matcher::Leaf], + }, + // Scan + Matcher::MatchOp { + op_type: RelOp::Scan, + children: vec![], + }, + ], } } } impl Rule for RuleNormalizeScalarFilter { fn id(&self) -> RuleID { - self.id + RuleID::NormalizeScalarFilter } - fn apply( - &self, - s_expr: &SExpr, - state: &mut crate::optimizer::optimizers::rule::TransformResult, - ) -> Result<()> { - let mut filter: Filter = s_expr.plan().clone().try_into()?; - - if filter - .predicates + fn apply(&self, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> { + let i = self + .matchers .iter() - .any(|p| is_true(p) || (is_falsy(p) && filter.predicates.len() > 1)) - { - filter.predicates = normalize_predicates(filter.predicates); - state.add_result(SExpr::create_unary( - Arc::new(filter.into()), - Arc::new(s_expr.child(0)?.clone()), - )); - Ok(()) - } else { - Ok(()) + .position(|matcher| matcher.matches(s_expr)) + .unwrap(); + self.apply_matcher(i, s_expr, state) + } + + fn apply_matcher(&self, i: usize, s_expr: &SExpr, state: &mut TransformResult) -> Result<()> { + match i { + 0 => { + let filter = s_expr.plan.as_filter().unwrap(); + let Some(predicates) = RewritePredicates {}.rewrite(&filter.predicates)? else { + return Ok(()); + }; + state.add_result(s_expr.replace_plan(Filter { predicates })); + Ok(()) + } + 1 => { + let scan = s_expr.plan.as_scan().unwrap(); + let Some(predicates) = &scan.push_down_predicates else { + return Ok(()); + }; + let Some(predicates) = RewritePredicates {}.rewrite(predicates)? else { + return Ok(()); + }; + let mut scan = scan.clone(); + scan.push_down_predicates = Some(predicates); + state.add_result(s_expr.replace_plan(scan)); + Ok(()) + } + _ => unreachable!(), } } @@ -114,3 +106,292 @@ impl Default for RuleNormalizeScalarFilter { Self::new() } } + +struct RewritePredicates {} + +impl RewritePredicates { + fn rewrite(&mut self, predicates: &[ScalarExpr]) -> Result>> { + let mut expr = if predicates.len() == 1 { + predicates[0].clone() + } else { + FunctionCall { + span: None, + func_name: "and_filters".to_string(), + params: vec![], + arguments: predicates.to_vec(), + } + .into() + }; + self.visit(&mut expr)?; + + match expr { + ScalarExpr::FunctionCall(FunctionCall { + func_name, + arguments, + .. + }) if func_name == "and_filters" => { + if arguments == predicates { + Ok(None) + } else { + Ok(Some(arguments)) + } + } + expr => Ok(Some(vec![expr])), + } + } + + fn rewrite_and( + &mut self, + span: Span, + arguments: &mut Vec, + ) -> Result> { + let func_arguments = std::mem::take(arguments); + for mut arg in func_arguments { + self.visit(&mut arg)?; + let inner_arguments = if let ScalarExpr::FunctionCall(call) = &mut arg + && &call.func_name == "and_filters" + { + std::mem::take(&mut call.arguments) + } else { + vec![arg] + }; + + for arg in inner_arguments { + if is_true(&arg) { + continue; + } + if is_falsy(&arg) { + return Ok(Some( + ConstantExpr { + span, + value: Scalar::Boolean(false), + } + .into(), + )); + } + arguments.push(arg) + } + } + + if arguments.is_empty() { + return Ok(Some( + ConstantExpr { + span, + value: Scalar::Boolean(true), + } + .into(), + )); + } + if arguments.len() == 1 { + return Ok(arguments.pop()); + } + Ok(None) + } + + fn rewrite_or( + &mut self, + span: Span, + arguments: &mut Vec, + ) -> Result> { + let func_arguments = std::mem::take(arguments); + for mut arg in func_arguments { + self.visit(&mut arg)?; + let inner_arguments = if let ScalarExpr::FunctionCall(call) = &mut arg + && &call.func_name == "or_filters" + { + std::mem::take(&mut call.arguments) + } else { + vec![arg] + }; + + for arg in inner_arguments { + if is_falsy(&arg) { + continue; + } + if is_true(&arg) { + return Ok(Some(arg)); + } + arguments.push(arg); + } + } + if arguments.is_empty() { + return Ok(Some( + ConstantExpr { + span, + value: Scalar::Boolean(false), + } + .into(), + )); + } + if arguments.len() == 1 { + return Ok(arguments.pop()); + } + Ok(None) + } +} + +impl<'a> VisitorMut<'a> for RewritePredicates { + fn visit(&mut self, expr: &'a mut ScalarExpr) -> Result<()> { + let new_expr = match expr { + ScalarExpr::FunctionCall(FunctionCall { + span, + func_name, + arguments, + .. + }) => match func_name.as_str() { + "and" | "and_filters" => { + if func_name == "and" { + *func_name = "and_filters".to_string() + } + self.rewrite_and(*span, arguments)? + } + "or" | "or_filters" => { + if func_name == "or" { + *func_name = "or_filters".to_string() + } + self.rewrite_or(*span, arguments)? + } + "not" => { + if let ScalarExpr::FunctionCall(inner) = &mut arguments[0] + && inner.func_name == "not" + { + Some(inner.arguments.pop().unwrap()) + } else { + None + } + } + _ => None, + }, + _ => None, + }; + if let Some(new_expr) = new_expr { + *expr = new_expr; + } + Ok(()) + } +} + +#[cfg(test)] +mod tests { + use std::io::Write; + + use databend_common_exception::Result; + use databend_common_expression::RawExpr; + use databend_common_expression::types::ArgType; + use databend_common_expression::types::BooleanType; + use databend_common_expression::types::DataType; + use databend_common_expression::types::UInt64Type; + use databend_common_functions::test_utils::parse_raw_expr; + use goldenfile::Mint; + + use super::*; + use crate::ColumnBindingBuilder; + use crate::Visibility; + use crate::plans::BoundColumnRef; + use crate::plans::CastExpr; + use crate::plans::ConstantExpr; + use crate::plans::FunctionCall; + use crate::plans::ScalarExpr; + + fn run_test( + file: &mut impl Write, + expr_text: &str, + columns: &[(&str, DataType)], + ) -> Result<()> { + writeln!(file, "in : {expr_text}")?; + + let raw_expr = parse_raw_expr(expr_text, columns); + let mut expr = raw_expr_to_scalar(&raw_expr, columns); + + RewritePredicates {}.visit(&mut expr)?; + + writeln!(file, "out : {}", expr.as_expr().unwrap())?; + writeln!(file)?; + Ok(()) + } + + fn raw_expr_to_scalar(raw_expr: &RawExpr, columns: &[(&str, DataType)]) -> ScalarExpr { + match raw_expr { + RawExpr::Constant { scalar, .. } => ScalarExpr::ConstantExpr(ConstantExpr { + span: None, + value: scalar.clone(), + }), + RawExpr::ColumnRef { id, .. } => { + let index = *id; + let (name, data_type) = &columns[index]; + let column = ColumnBindingBuilder::new( + name.to_string(), + index, + Box::new(data_type.clone()), + Visibility::Visible, + ) + .build(); + ScalarExpr::BoundColumnRef(BoundColumnRef { span: None, column }) + } + RawExpr::Cast { + expr, + dest_type, + is_try, + .. + } => ScalarExpr::CastExpr(CastExpr { + span: None, + is_try: *is_try, + argument: Box::new(raw_expr_to_scalar(expr, columns)), + target_type: Box::new(dest_type.clone()), + }), + RawExpr::FunctionCall { + name, args, params, .. + } => ScalarExpr::FunctionCall(FunctionCall { + span: None, + func_name: name.clone(), + params: params.clone(), + arguments: args + .iter() + .map(|arg| raw_expr_to_scalar(arg, columns)) + .collect(), + }), + RawExpr::LambdaFunctionCall { .. } => { + unreachable!("lambda expressions are not used in tests") + } + } + } + + #[test] + fn test_rule_normalize_scalar() -> Result<()> { + let mut mint = Mint::new("tests/ut/testdata"); + let file = &mut mint.new_goldenfile("rule_normalize_scalar.txt").unwrap(); + + let columns = &[ + ("a", UInt64Type::data_type()), + ("b", BooleanType::data_type()), + ("c", UInt64Type::data_type().wrap_nullable()), + ]; + + run_test(file, "a = 5", columns)?; + + run_test(file, "a != 3 and a != 4 and a != 5", columns)?; + run_test(file, "a != 3 and true and a != 5", columns)?; + run_test(file, "a != 3 and false and a != 5", columns)?; + + run_test(file, "true and true", columns)?; + + run_test(file, "a = 3 or a = 4 or a = 5", columns)?; + run_test(file, "a = 3 or true or a = 5", columns)?; + run_test(file, "a = 3 or false or a = 5", columns)?; + run_test(file, "a = 3 or false", columns)?; + + run_test(file, "false or false", columns)?; + + run_test( + file, + "(a = 9 or a = 8) and (a = 7 or a = 5) and a = 3", + columns, + )?; + + run_test(file, "not(not(b))", columns)?; + + run_test(file, "is_not_null(c < 3 and c < 4)", columns)?; + + Ok(()) + } +} diff --git a/src/query/sql/src/planner/planner.rs b/src/query/sql/src/planner/planner.rs index 6d72d8b866143..5e971288f98a7 100644 --- a/src/query/sql/src/planner/planner.rs +++ b/src/query/sql/src/planner/planner.rs @@ -51,6 +51,8 @@ use crate::NameResolutionContext; use crate::VariableNormalizer; use crate::optimizer::OptimizerContext; use crate::optimizer::optimize; +use crate::optimizer::optimizers::recursive::RecursiveRuleOptimizer; +use crate::optimizer::optimizers::rule::RuleID; use crate::planner::QueryExecutor; use crate::plans::Plan; @@ -290,6 +292,22 @@ impl Planner { .set_sample_executor(self.query_executor.clone()) .clone(); + { + let mut agg_indices = metadata.read().agg_indices().clone(); + let optimizer = RecursiveRuleOptimizer::new(opt_ctx.clone(), &[ + RuleID::NormalizeScalarFilter, + RuleID::FilterNulls, + RuleID::EliminateFilter, + RuleID::MergeFilter, + ]); + for indices in &mut agg_indices.values_mut() { + for (_, _, s_expr) in indices { + *s_expr = optimizer.optimize_sync(s_expr)?; + } + } + metadata.write().replace_agg_indices(agg_indices); + } + let optimized_plan = optimize(opt_ctx, plan).await?; if enable_planner_cache { diff --git a/src/query/sql/src/planner/plans/filter.rs b/src/query/sql/src/planner/plans/filter.rs index fc02de99c0b93..0e6c460f66723 100644 --- a/src/query/sql/src/planner/plans/filter.rs +++ b/src/query/sql/src/planner/plans/filter.rs @@ -13,13 +13,11 @@ // limitations under the License. use std::collections::HashMap; -use std::collections::HashSet; use std::sync::Arc; use databend_common_exception::Result; use crate::ColumnSet; -use crate::optimizer::ir::MAX_SELECTIVITY; use crate::optimizer::ir::RelExpr; use crate::optimizer::ir::RelationalProperty; use crate::optimizer::ir::SelectivityEstimator; @@ -88,23 +86,17 @@ impl Operator for Filter { fn derive_stats(&self, rel_expr: &RelExpr) -> Result> { let stat_info = rel_expr.derive_cardinality_child(0)?; - let (input_cardinality, mut statistics) = - (stat_info.cardinality, stat_info.statistics.clone()); // Derive cardinality - let mut sb = SelectivityEstimator::new(&mut statistics, input_cardinality, HashSet::new()); - let mut selectivity = MAX_SELECTIVITY; - for pred in self.predicates.iter() { - // Compute selectivity for each conjunction - selectivity = selectivity.min(sb.compute_selectivity(pred, true)?); - } - // Update other columns's statistic according to selectivity. - sb.update_other_statistic_by_selectivity(selectivity); - let cardinality = input_cardinality * selectivity; + let mut sb = SelectivityEstimator::new( + stat_info.statistics.column_stats.clone(), + stat_info.cardinality, + ); + let cardinality = sb.apply(&self.predicates)?; // Derive column statistics let column_stats = if cardinality == 0.0 { HashMap::new() } else { - statistics.column_stats + sb.into_column_stats() }; Ok(Arc::new(StatInfo { cardinality, diff --git a/src/query/sql/src/planner/plans/scan.rs b/src/query/sql/src/planner/plans/scan.rs index c46666c2f153a..dd149999c35cf 100644 --- a/src/query/sql/src/planner/plans/scan.rs +++ b/src/query/sql/src/planner/plans/scan.rs @@ -13,7 +13,6 @@ // limitations under the License. use std::collections::HashMap; -use std::collections::HashSet; use std::sync::Arc; use databend_common_ast::ast::SampleConfig; @@ -37,7 +36,6 @@ use crate::optimizer::ir::ColumnStat; use crate::optimizer::ir::ColumnStatSet; use crate::optimizer::ir::Distribution; use crate::optimizer::ir::HistogramBuilder; -use crate::optimizer::ir::MAX_SELECTIVITY; use crate::optimizer::ir::Ndv; use crate::optimizer::ir::PhysicalProperty; use crate::optimizer::ir::RelExpr; @@ -330,25 +328,11 @@ impl Operator for Scan { let cardinality = match (precise_cardinality, &self.prewhere) { (Some(precise_cardinality), Some(prewhere)) => { - let mut statistics = OpStatistics { - precise_cardinality: Some(precise_cardinality), - column_stats, - }; // Derive cardinality - let mut sb = SelectivityEstimator::new( - &mut statistics, - precise_cardinality as f64, - HashSet::new(), - ); - let mut selectivity = MAX_SELECTIVITY; - for pred in prewhere.predicates.iter() { - // Compute selectivity for each conjunction - selectivity = selectivity.min(sb.compute_selectivity(pred, true)?); - } - // Update other columns's statistic according to selectivity. - sb.update_other_statistic_by_selectivity(selectivity); - column_stats = statistics.column_stats; - (precise_cardinality as f64) * selectivity + let mut sb = SelectivityEstimator::new(column_stats, precise_cardinality as f64); + let cardinality = sb.apply(&prewhere.predicates)?; + column_stats = sb.into_column_stats(); + cardinality } (Some(precise_cardinality), None) => precise_cardinality as f64, (_, _) => 0.0, diff --git a/src/query/sql/src/planner/plans/secure_filter.rs b/src/query/sql/src/planner/plans/secure_filter.rs index df7a72e386726..2202db6d5086d 100644 --- a/src/query/sql/src/planner/plans/secure_filter.rs +++ b/src/query/sql/src/planner/plans/secure_filter.rs @@ -13,14 +13,12 @@ // limitations under the License. use std::collections::HashMap; -use std::collections::HashSet; use std::sync::Arc; use databend_common_exception::Result; use crate::ColumnSet; use crate::IndexType; -use crate::optimizer::ir::MAX_SELECTIVITY; use crate::optimizer::ir::RelExpr; use crate::optimizer::ir::RelationalProperty; use crate::optimizer::ir::SelectivityEstimator; @@ -93,28 +91,21 @@ impl Operator for SecureFilter { // For SecureFilter, we apply the selectivity calculation like normal Filter // but we hide/suppress column statistics for sensitive columns - let (input_cardinality, mut statistics) = - (stat_info.cardinality, stat_info.statistics.clone()); // Apply selectivity calculation - let mut sb = SelectivityEstimator::new(&mut statistics, input_cardinality, HashSet::new()); - let mut selectivity = MAX_SELECTIVITY; - for pred in self.predicates.iter() { - selectivity = selectivity.min(sb.compute_selectivity(pred, true)?); - } - sb.update_other_statistic_by_selectivity(selectivity); - let cardinality = input_cardinality * selectivity; - - // SECURITY: Hide column statistics for SecureFilter to prevent data leakage - // This is a key security feature - we return empty column stats to prevent - // inference attacks based on statistical information - let column_stats = HashMap::new(); - + let mut sb = SelectivityEstimator::new( + stat_info.statistics.column_stats.clone(), + stat_info.cardinality, + ); + let cardinality = sb.apply(&self.predicates)?; Ok(Arc::new(StatInfo { cardinality, + // SECURITY: Hide column statistics for SecureFilter to prevent data leakage + // This is a key security feature - we return empty column stats to prevent + // inference attacks based on statistical information statistics: Statistics { precise_cardinality: None, - column_stats, + column_stats: HashMap::new(), }, })) } diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 15522eeb02852..c57758f10a79d 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -284,7 +284,7 @@ impl<'a> TypeChecker<'a> { #[recursive::recursive] pub fn resolve(&mut self, expr: &Expr) -> Result> { - let box (scalar, data_type): Box<(ScalarExpr, DataType)> = match expr { + match expr { Expr::ColumnRef { span, column: @@ -454,7 +454,7 @@ impl<'a> TypeChecker<'a> { } }; - Box::new((scalar, data_type)) + Ok(Box::new((scalar, data_type))) } Expr::IsNull { @@ -462,9 +462,9 @@ impl<'a> TypeChecker<'a> { } => { let args = &[expr.as_ref()]; if *not { - self.resolve_function(*span, "is_not_null", vec![], args)? + self.resolve_function(*span, "is_not_null", vec![], args) } else { - self.resolve_function(*span, "is_null", vec![], args)? + self.resolve_function(*span, "is_null", vec![], args) } } @@ -517,7 +517,7 @@ impl<'a> TypeChecker<'a> { right: right.clone(), }, ])?; - self.resolve_scalar_function_call(*span, "assume_not_null", vec![], vec![scalar])? + self.resolve_scalar_function_call(*span, "assume_not_null", vec![], vec![scalar]) } Expr::InList { @@ -571,9 +571,9 @@ impl<'a> TypeChecker<'a> { window: None, lambda: None, }, - })? + }) } else { - self.resolve_function(*span, "contains", vec![], &args)? + self.resolve_function(*span, "contains", vec![], &args) } } else { let mut result = list @@ -607,7 +607,7 @@ impl<'a> TypeChecker<'a> { expr: Box::new(result), }; } - self.resolve(&result)? + self.resolve(&result) } } @@ -638,7 +638,7 @@ impl<'a> TypeChecker<'a> { self.resolve_scalar_function_call(*span, "and", vec![], vec![ ge_func.clone(), le_func.clone(), - ])? + ]) } else { // Rewrite `expr NOT BETWEEN low AND high` // into `expr < low OR expr > high` @@ -655,7 +655,7 @@ impl<'a> TypeChecker<'a> { high.as_ref(), )?; - self.resolve_scalar_function_call(*span, "or", vec![], vec![lt_func, gt_func])? + self.resolve_scalar_function_call(*span, "or", vec![], vec![lt_func, gt_func]) } } @@ -665,7 +665,7 @@ impl<'a> TypeChecker<'a> { left, right, .. - } => self.resolve_binary_op_or_subquery(span, op, left, right)?, + } => self.resolve_binary_op_or_subquery(span, op, left, right), Expr::JsonOp { span, @@ -674,12 +674,10 @@ impl<'a> TypeChecker<'a> { right, } => { let func_name = op.to_func_name(); - self.resolve_function(*span, func_name.as_str(), vec![], &[left, right])? + self.resolve_function(*span, func_name.as_str(), vec![], &[left, right]) } - Expr::UnaryOp { span, op, expr, .. } => { - self.resolve_unary_op(*span, op, expr.as_ref())? - } + Expr::UnaryOp { span, op, expr, .. } => self.resolve_unary_op(*span, op, expr.as_ref()), Expr::Cast { expr, target_type, .. @@ -718,7 +716,7 @@ impl<'a> TypeChecker<'a> { checked_expr.data_type().clone() }; - Box::new(( + Ok(Box::new(( CastExpr { span: expr.span(), is_try: false, @@ -727,7 +725,7 @@ impl<'a> TypeChecker<'a> { } .into(), target_type, - )) + ))) } Expr::TryCast { @@ -763,7 +761,7 @@ impl<'a> TypeChecker<'a> { } else { checked_expr.data_type().clone() }; - Box::new(( + Ok(Box::new(( CastExpr { span: expr.span(), is_try: true, @@ -772,7 +770,7 @@ impl<'a> TypeChecker<'a> { } .into(), target_type, - )) + ))) } Expr::Case { @@ -817,7 +815,7 @@ impl<'a> TypeChecker<'a> { } let args_ref: Vec<&Expr> = arguments.iter().collect(); - self.resolve_function(*span, "if", vec![], &args_ref)? + self.resolve_function(*span, "if", vec![], &args_ref) } Expr::Substring { @@ -831,10 +829,10 @@ impl<'a> TypeChecker<'a> { if let Some(substring_for) = substring_for { arguments.push(substring_for.as_ref()); } - self.resolve_function(*span, "substring", vec![], &arguments)? + self.resolve_function(*span, "substring", vec![], &arguments) } - Expr::Literal { span, value } => self.resolve_literal(*span, value)?, + Expr::Literal { span, value } => self.resolve_literal(*span, value), Expr::FunctionCall { span, @@ -969,7 +967,7 @@ impl<'a> TypeChecker<'a> { &window.ignore_nulls, )?; let display_name = format!("{:#}", expr); - self.resolve_window(*span, display_name, &window.window, func)? + self.resolve_window(*span, display_name, &window.window, func) } else if AggregateFunctionFactory::instance().contains(func_name) { let mut new_params = Vec::with_capacity(params.len()); for param in params { @@ -1007,10 +1005,10 @@ impl<'a> TypeChecker<'a> { } // general window function let func = WindowFuncType::Aggregate(new_agg_func); - self.resolve_window(*span, display_name, &window.window, func)? + self.resolve_window(*span, display_name, &window.window, func) } else { // aggregate function - Box::new((new_agg_func.into(), data_type)) + Ok(Box::new((new_agg_func.into(), data_type))) } } else if GENERAL_LAMBDA_FUNCTIONS.contains(&uni_case_func_name) { if lambda.is_none() { @@ -1020,12 +1018,12 @@ impl<'a> TypeChecker<'a> { .set_span(*span)); } let lambda = lambda.as_ref().unwrap(); - self.resolve_lambda_function(*span, func_name, &args, lambda)? + self.resolve_lambda_function(*span, func_name, &args, lambda) } else if GENERAL_SEARCH_FUNCTIONS.contains(&uni_case_func_name) { match func_name.to_lowercase().as_str() { - "score" => self.resolve_score_search_function(*span, func_name, &args)?, - "match" => self.resolve_match_search_function(*span, func_name, &args)?, - "query" => self.resolve_query_search_function(*span, func_name, &args)?, + "score" => self.resolve_score_search_function(*span, func_name, &args), + "match" => self.resolve_match_search_function(*span, func_name, &args), + "query" => self.resolve_query_search_function(*span, func_name, &args), _ => { return Err(ErrorCode::SemanticError(format!( "cannot find search function {}", @@ -1035,14 +1033,14 @@ impl<'a> TypeChecker<'a> { } } } else if ASYNC_FUNCTIONS.contains(&uni_case_func_name) { - self.resolve_async_function(*span, func_name, &args)? + self.resolve_async_function(*span, func_name, &args) } else if BUILTIN_FUNCTIONS .get_property(func_name) .map(|property| property.kind == FunctionKind::SRF) .unwrap_or(false) { // Set returning function - self.resolve_set_returning_function(*span, func_name, &args)? + self.resolve_set_returning_function(*span, func_name, &args) } else { // Scalar function let mut new_params: Vec = Vec::with_capacity(params.len()); @@ -1062,7 +1060,7 @@ impl<'a> TypeChecker<'a> { .scalar; new_params.push(constant); } - self.resolve_function(*span, func_name, new_params, &args)? + self.resolve_function(*span, func_name, new_params, &args) } } @@ -1074,10 +1072,10 @@ impl<'a> TypeChecker<'a> { // aggregate window function let display_name = format!("{:#}", expr); let func = WindowFuncType::Aggregate(new_agg_func); - self.resolve_window(*span, display_name, window, func)? + self.resolve_window(*span, display_name, window, func) } else { // aggregate function - Box::new((new_agg_func.into(), data_type)) + Ok(Box::new((new_agg_func.into(), data_type))) } } @@ -1090,10 +1088,10 @@ impl<'a> TypeChecker<'a> { subquery, None, None, - )?, + ), Expr::Subquery { subquery, .. } => { - self.resolve_subquery(SubqueryType::Scalar, subquery, None, None)? + self.resolve_subquery(SubqueryType::Scalar, subquery, None, None) } Expr::InSubquery { @@ -1117,7 +1115,7 @@ impl<'a> TypeChecker<'a> { subquery, Some(*expr.clone()), Some(SubqueryComparisonOp::Equal), - )? + ) } Expr::LikeSubquery { @@ -1133,7 +1131,7 @@ impl<'a> TypeChecker<'a> { span, modifier, &BinaryOperator::Like(escape.clone()), - )?, + ), Expr::LikeAnyWithEscape { span, @@ -1145,7 +1143,7 @@ impl<'a> TypeChecker<'a> { &BinaryOperator::LikeAny(Some(escape.clone())), left, right, - )?, + ), Expr::LikeWithEscape { span, @@ -1160,7 +1158,7 @@ impl<'a> TypeChecker<'a> { BinaryOperator::Like(Some(escape.clone())) }; - self.resolve_binary_op_or_subquery(span, &like_op, left, right)? + self.resolve_binary_op_or_subquery(span, &like_op, left, right) } expr @ Expr::MapAccess { span, .. } => { @@ -1198,16 +1196,16 @@ impl<'a> TypeChecker<'a> { }; paths.push_front((*span, path)); } - self.resolve_map_access(*span, expr, paths)? + self.resolve_map_access(*span, expr, paths) } Expr::Extract { span, kind, expr, .. - } => self.resolve_extract_expr(*span, kind, expr)?, + } => self.resolve_extract_expr(*span, kind, expr), Expr::DatePart { span, kind, expr, .. - } => self.resolve_extract_expr(*span, kind, expr)?, + } => self.resolve_extract_expr(*span, kind, expr), Expr::Interval { span, expr, unit } => { let ex = Expr::Cast { @@ -1243,7 +1241,7 @@ impl<'a> TypeChecker<'a> { lambda: None, }, }; - self.resolve(&ex)? + self.resolve(&ex) } Expr::DateAdd { span, @@ -1251,21 +1249,21 @@ impl<'a> TypeChecker<'a> { interval, date, .. - } => self.resolve_date_arith(*span, unit, interval, date, expr)?, + } => self.resolve_date_arith(*span, unit, interval, date, expr), Expr::DateDiff { span, unit, date_start, date_end, .. - } => self.resolve_date_arith(*span, unit, date_start, date_end, expr)?, + } => self.resolve_date_arith(*span, unit, date_start, date_end, expr), Expr::DateBetween { span, unit, date_start, date_end, .. - } => self.resolve_date_arith(*span, unit, date_start, date_end, expr)?, + } => self.resolve_date_arith(*span, unit, date_start, date_end, expr), Expr::DateSub { span, unit, @@ -1282,10 +1280,10 @@ impl<'a> TypeChecker<'a> { }, date, expr, - )?, + ), Expr::DateTrunc { span, unit, date, .. - } => self.resolve_date_trunc(*span, date, unit)?, + } => self.resolve_date_trunc(*span, date, unit), Expr::TimeSlice { span, unit, @@ -1293,25 +1291,25 @@ impl<'a> TypeChecker<'a> { slice_length, start_or_end, } => { - self.resolve_time_slice(*span, date, *slice_length, unit, start_or_end.to_string())? + self.resolve_time_slice(*span, date, *slice_length, unit, start_or_end.to_string()) } Expr::LastDay { span, unit, date, .. - } => self.resolve_last_day(*span, date, unit)?, + } => self.resolve_last_day(*span, date, unit), Expr::PreviousDay { span, unit, date, .. - } => self.resolve_previous_or_next_day(*span, date, unit, true)?, + } => self.resolve_previous_or_next_day(*span, date, unit, true), Expr::NextDay { span, unit, date, .. - } => self.resolve_previous_or_next_day(*span, date, unit, false)?, + } => self.resolve_previous_or_next_day(*span, date, unit, false), Expr::Trim { span, expr, trim_where, .. - } => self.resolve_trim_function(*span, expr, trim_where)?, + } => self.resolve_trim_function(*span, expr, trim_where), - Expr::Array { span, exprs, .. } => self.resolve_array(*span, exprs)?, + Expr::Array { span, exprs, .. } => self.resolve_array(*span, exprs), Expr::Position { substr_expr, @@ -1321,11 +1319,11 @@ impl<'a> TypeChecker<'a> { } => self.resolve_function(*span, "locate", vec![], &[ substr_expr.as_ref(), str_expr.as_ref(), - ])?, + ]), - Expr::Map { span, kvs, .. } => self.resolve_map(*span, kvs)?, + Expr::Map { span, kvs, .. } => self.resolve_map(*span, kvs), - Expr::Tuple { span, exprs, .. } => self.resolve_tuple(*span, exprs)?, + Expr::Tuple { span, exprs, .. } => self.resolve_tuple(*span, exprs), Expr::Hole { span, .. } | Expr::Placeholder { span } => { return Err(ErrorCode::SemanticError( @@ -1333,11 +1331,8 @@ impl<'a> TypeChecker<'a> { ) .set_span(*span)); } - Expr::StageLocation { span, location } => { - self.resolve_stage_location(*span, location)? - } - }; - Ok(Box::new((scalar, data_type))) + Expr::StageLocation { span, location } => self.resolve_stage_location(*span, location), + } } fn resolve_binary_op_or_subquery( @@ -4137,28 +4132,13 @@ impl<'a> TypeChecker<'a> { ])) } ("equal_null", &[arg_x, arg_y]) => { - // Rewrite equal_null(x, y) to ifnull(x = y, false) or (x is null and y is null) + // Rewrite equal_null(x, y) to if(is_not_null( x = y ), is_true( x = y ), x is null and y is null) let eq_expr = Expr::BinaryOp { span, op: BinaryOperator::Eq, left: Box::new(arg_x.clone()), right: Box::new(arg_y.clone()), }; - let ifnull_expr = Expr::FunctionCall { - span, - func: ASTFunctionCall { - distinct: false, - name: Identifier::from_name(span, "ifnull"), - args: vec![eq_expr, Expr::Literal { - span, - value: Literal::Boolean(false), - }], - params: vec![], - order_by: vec![], - window: None, - lambda: None, - }, - }; let is_null_x = Expr::IsNull { span, @@ -4170,14 +4150,30 @@ impl<'a> TypeChecker<'a> { expr: Box::new(arg_y.clone()), not: false, }; - let and_expr = Expr::BinaryOp { - span, - op: BinaryOperator::And, - left: Box::new(is_null_x), - right: Box::new(is_null_y), - }; - Some(self.resolve_function(span, "or", vec![], &[&ifnull_expr, &and_expr])) + Some(self.resolve_function(span, "if", vec![], &[ + &Expr::IsNull { + span, + expr: Box::new(eq_expr.clone()), + not: true, + }, + &Expr::FunctionCall { + span, + func: ASTFunctionCall { + name: Identifier::from_name(span, "is_true"), + args: vec![eq_expr], + ..Default::default() + }, + }, + &Expr::FunctionCall { + span, + func: ASTFunctionCall { + name: Identifier::from_name(span, "and_filters"), + args: vec![is_null_x, is_null_y], + ..Default::default() + }, + }, + ])) } ("iff", args) => Some(self.resolve_function(span, "if", vec![], args)), ("ifnull" | "nvl", args) => { diff --git a/src/query/sql/tests/ut/testdata/rule_normalize_scalar.txt b/src/query/sql/tests/ut/testdata/rule_normalize_scalar.txt new file mode 100644 index 0000000000000..514ec2b2e3007 --- /dev/null +++ b/src/query/sql/tests/ut/testdata/rule_normalize_scalar.txt @@ -0,0 +1,39 @@ +in : a = 5 +out : eq(a (#0), 5_u64) + +in : a != 3 and a != 4 and a != 5 +out : and_filters(noteq(a (#0), CAST(3_u8 AS UInt64)), noteq(a (#0), CAST(4_u8 AS UInt64)), noteq(a (#0), CAST(5_u8 AS UInt64))) + +in : a != 3 and true and a != 5 +out : and_filters(noteq(a (#0), CAST(3_u8 AS UInt64)), noteq(a (#0), CAST(5_u8 AS UInt64))) + +in : a != 3 and false and a != 5 +out : false + +in : true and true +out : true + +in : a = 3 or a = 4 or a = 5 +out : or_filters(eq(a (#0), 3_u64), eq(a (#0), 4_u64), eq(a (#0), 5_u64)) + +in : a = 3 or true or a = 5 +out : true + +in : a = 3 or false or a = 5 +out : or_filters(eq(a (#0), 3_u64), eq(a (#0), 5_u64)) + +in : a = 3 or false +out : eq(a (#0), 3_u64) + +in : false or false +out : false + +in : (a = 9 or a = 8) and (a = 7 or a = 5) and a = 3 +out : and_filters(or_filters(eq(a (#0), 9_u64), eq(a (#0), 8_u64)), or_filters(eq(a (#0), 7_u64), eq(a (#0), 5_u64)), eq(a (#0), 3_u64)) + +in : not(not(b)) +out : b (#1) + +in : is_not_null(c < 3 and c < 4) +out : is_not_null(and(lt(c (#2), CAST(3_u8 AS UInt64 NULL)), lt(c (#2), CAST(4_u8 AS UInt64 NULL)))) + diff --git a/src/query/sql/tests/ut/testdata/selectivity.txt b/src/query/sql/tests/ut/testdata/selectivity.txt new file mode 100644 index 0000000000000..d9a698eee4ebc --- /dev/null +++ b/src/query/sql/tests/ut/testdata/selectivity.txt @@ -0,0 +1,316 @@ +expr : a = 5 +cardinality : 100 +estimated : 0 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(5), max: UInt(5), ndv: Stat(1.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } + +expr : a = 15 +cardinality : 100 +estimated : 10 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(15), max: UInt(15), ndv: Stat(1.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(1.0), null_count: 1, histogram: None } + +expr : a != 5 +cardinality : 100 +estimated : 100 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } + +expr : a != 15 +cardinality : 100 +estimated : 90 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(9.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(9.0), null_count: 9, histogram: None } + +expr : a > 5 +cardinality : 100 +estimated : 100 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } + +expr : a > 10 +cardinality : 100 +estimated : 90 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(9.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(9.0), null_count: 9, histogram: None } + +expr : a > 17 +cardinality : 100 +estimated : 36.36363636363637 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(17), max: UInt(20), ndv: Stat(4.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(4.0), null_count: 4, histogram: None } + +expr : a > 20 +cardinality : 100 +estimated : 0 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } + +expr : a > 25 +cardinality : 100 +estimated : 0 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } + +expr : a >= 5 +cardinality : 100 +estimated : 100 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } + +expr : a >= 10 +cardinality : 100 +estimated : 100 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } + +expr : a >= 17 +cardinality : 100 +estimated : 36.36363636363637 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(17), max: UInt(20), ndv: Stat(4.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(4.0), null_count: 4, histogram: None } + +expr : a >= 20 +cardinality : 100 +estimated : 10 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(20), max: UInt(20), ndv: Stat(1.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(1.0), null_count: 1, histogram: None } + +expr : a >= 25 +cardinality : 100 +estimated : 0 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } + +expr : a < 5 +cardinality : 100 +estimated : 0 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } + +expr : a < 10 +cardinality : 100 +estimated : 0 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } + +expr : a < 17 +cardinality : 100 +estimated : 72.72727272727273 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(17), ndv: Stat(8.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(8.0), null_count: 8, histogram: None } + +expr : a < 20 +cardinality : 100 +estimated : 90 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(9.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(9.0), null_count: 9, histogram: None } + +expr : a < 25 +cardinality : 100 +estimated : 100 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } + +expr : a <= 5 +cardinality : 100 +estimated : 0 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(0.0), null_count: 0, histogram: None } + +expr : a <= 10 +cardinality : 100 +estimated : 10 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(10), ndv: Stat(1.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(1.0), null_count: 1, histogram: None } + +expr : a <= 17 +cardinality : 100 +estimated : 72.72727272727273 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(17), ndv: Stat(8.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(8.0), null_count: 8, histogram: None } + +expr : a <= 20 +cardinality : 100 +estimated : 100 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } + +expr : a <= 25 +cardinality : 100 +estimated : 100 +in stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(10), max: UInt(20), ndv: Stat(10.0), null_count: 10, histogram: None } + +expr : and_filters(a = 5, a > 3) +cardinality : 100 +estimated : 10 +in stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(5), max: UInt(5), ndv: Stat(1.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(1.0), null_count: 1, histogram: None } + +expr : or_filters(a = 5, a = 6) +cardinality : 100 +estimated : 19 +in stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(2.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(2.0), null_count: 2, histogram: None } + +expr : not(a = 5) +cardinality : 100 +estimated : 90 +in stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(9.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(9.0), null_count: 9, histogram: None } + +expr : is_not_null(b) +cardinality : 100 +estimated : 90 +in stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(9.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(9.0), null_count: 9, histogram: None } + +expr : a % 4 = 1 +cardinality : 100 +estimated : 25 +in stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(3.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(3.0), null_count: 3, histogram: None } + +expr : a % 4 = 5 +cardinality : 100 +estimated : 0 +in stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(10.0), null_count: 10, histogram: None } +out stats : +0 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(0.0), null_count: 0, histogram: None } +1 ColumnStat { min: UInt(0), max: UInt(9), ndv: Stat(0.0), null_count: 0, histogram: None } + +expr : s like 'ab%' +cardinality : 100 +estimated : 50 +in stats : +0 ColumnStat { min: Bytes([97, 97]), max: Bytes([122, 122]), ndv: Stat(52.0), null_count: 0, histogram: None } +out stats : +0 ColumnStat { min: Bytes([97, 97]), max: Bytes([122, 122]), ndv: Stat(26.0), null_count: 0, histogram: None } + +expr : s like '%ab_' +cardinality : 100 +estimated : 22.5 +in stats : +0 ColumnStat { min: Bytes([97, 97]), max: Bytes([122, 122]), ndv: Stat(52.0), null_count: 0, histogram: None } +out stats : +0 ColumnStat { min: Bytes([97, 97]), max: Bytes([122, 122]), ndv: Stat(12.0), null_count: 0, histogram: None } + diff --git a/tests/sqllogictests/suites/crdb/join.test b/tests/sqllogictests/suites/crdb/join.test index df08cf5ed9d27..489bd361c4de6 100644 --- a/tests/sqllogictests/suites/crdb/join.test +++ b/tests/sqllogictests/suites/crdb/join.test @@ -204,7 +204,7 @@ SELECT * FROM empty AS a(x) JOIN onecolumn AS b(y) ON a.x = b.y statement ok SELECT * FROM empty AS a JOIN onecolumn AS b USING(x) -query IT +query II SELECT * FROM onecolumn AS a(x) LEFT OUTER JOIN empty AS b(y) ON a.x = b.y ORDER BY a.x ---- 42 NULL @@ -288,7 +288,7 @@ SELECT o.x, t.y FROM onecolumn o INNER JOIN twocolumn t ON (o.x=t.x AND t.y=53) ---- 42 53 -query IT +query II SELECT o.x, t.y FROM onecolumn o LEFT OUTER JOIN twocolumn t ON (o.x=t.x AND t.y=53) order by o.x ---- 42 53 @@ -330,27 +330,27 @@ CREATE TABLE b (i int, b bool) statement ok INSERT INTO b VALUES (2, true), (3, true), (4, false) -query III +query IIB SELECT * FROM a INNER JOIN b ON a.i = b.i ---- 2 2 1 3 3 1 -query ITT +query IIB SELECT * FROM a LEFT OUTER JOIN b ON a.i = b.i ---- 1 NULL NULL 2 2 1 3 3 1 -query III +query IIB SELECT * FROM a RIGHT OUTER JOIN b ON a.i = b.i order by b ---- 2 2 1 3 3 1 NULL 4 0 -query III +query IIB SELECT * FROM a FULL OUTER JOIN b ON a.i = b.i order by b ---- 1 NULL NULL @@ -358,7 +358,7 @@ SELECT * FROM a FULL OUTER JOIN b ON a.i = b.i order by b 3 3 1 NULL 4 0 -query III +query IIB SELECT * FROM a FULL OUTER JOIN b ON (a.i = b.i and a.i>2) ORDER BY a.i, b.i ---- 1 NULL NULL @@ -371,7 +371,7 @@ NULL 4 0 statement ok INSERT INTO b VALUES (3, false) -query III +query IIB SELECT * FROM a RIGHT OUTER JOIN b ON a.i=b.i ORDER BY b.i, b.b ---- 2 2 1 @@ -379,7 +379,7 @@ SELECT * FROM a RIGHT OUTER JOIN b ON a.i=b.i ORDER BY b.i, b.b 3 3 1 NULL 4 0 -query III +query IIB SELECT * FROM a FULL OUTER JOIN b ON a.i=b.i ORDER BY b.i, b.b ---- 1 NULL NULL @@ -411,7 +411,7 @@ SELECT * FROM onecolumn JOIN (SELECT x + 2 AS x FROM onecolumn) USING(x) ---- 44 -query IIIII +query IIII SELECT * FROM (twocolumn AS a JOIN twocolumn AS b USING(x) JOIN twocolumn AS c USING(x)) ORDER BY x LIMIT 1 ---- 42 53 53 53 @@ -545,7 +545,7 @@ SELECT * FROM pairs FULL OUTER JOIN square ON pairs.a + pairs.b = square.sq WHER 1 3 2 4 3 6 3 9 -query IITT +query IIII SELECT * FROM (SELECT * FROM pairs LEFT JOIN square ON b = sq AND a > 1 AND n < 6) WHERE b > 1 AND (n IS NULL OR n > 1) AND (n IS NULL OR a < sq) ---- 1 2 NULL NULL @@ -769,7 +769,7 @@ SELECT * FROM xyu INNER JOIN xyv ON xyu.x = xyv.x AND xyu.y = xyv.y AND xyu.x = ---- 1 1 1 1 1 1 -query IIITTT +query IIIIII SELECT * FROM xyu LEFT OUTER JOIN xyv ON xyu.x = xyv.x AND xyu.y = xyv.y AND xyu.x = 1 AND xyu.y < 10 ---- 0 0 0 NULL NULL NULL @@ -804,7 +804,7 @@ NULL 5 5 55 # query # SELECT * FROM (SELECT * FROM xyu ORDER BY x, y) AS xyu FULL OUTER JOIN (SELECT * FROM xyv ORDER BY x, y) AS xyv USING(x, y) WHERE x > 2 -query IIITTT +query IIIIII SELECT * FROM (SELECT * FROM xyu ORDER BY x, y) AS xyu LEFT OUTER JOIN (SELECT * FROM xyv ORDER BY x, y) AS xyv ON xyu.x = xyv.x AND xyu.y = xyv.y AND xyu.x = 1 AND xyu.y < 10 ---- 0 0 0 NULL NULL NULL @@ -878,70 +878,70 @@ CREATE TABLE bar ( a INT, b FLOAT, c FLOAT, d INT) statement ok INSERT INTO bar VALUES (1, 1, 1, 1), (2, 2, 2, 2), (3, 3, 3, 3) -query II?? +query IIRR SELECT * FROM foo NATURAL JOIN bar ---- 1 1 1.0 1.0 2 2 2.0 2.0 3 3 3.0 3.0 -query II??I?I +query IIRRIRI SELECT * FROM foo JOIN bar USING (b) ---- 1 1 1.0 1.0 1 1.0 1 2 2 2.0 2.0 2 2.0 2 3 3 3.0 3.0 3 3.0 3 -query II???I +query IIRRRI SELECT * FROM foo JOIN bar USING (a, b) ---- 1 1 1.0 1.0 1.0 1 2 2 2.0 2.0 2.0 2 3 3 3.0 3.0 3.0 3 -query II??I +query IIRRI SELECT * FROM foo JOIN bar USING (a, b, c) ---- 1 1 1.0 1.0 1 2 2 2.0 2.0 2 3 3 3.0 3.0 3 -query II??I??I +query IIRRIRRI SELECT * FROM foo JOIN bar ON foo.b = bar.b ---- 1 1 1.0 1.0 1 1.0 1.0 1 2 2 2.0 2.0 2 2.0 2.0 2 3 3 3.0 3.0 3 3.0 3.0 3 -query II??I??I +query IIRRIRRI SELECT * FROM foo JOIN bar ON foo.a = bar.a AND foo.b = bar.b ---- 1 1 1.0 1.0 1 1.0 1.0 1 2 2 2.0 2.0 2 2.0 2.0 2 3 3 3.0 3.0 3 3.0 3.0 3 -query II??I??I +query IIRRIRRI SELECT * FROM foo, bar WHERE foo.b = bar.b ---- 1 1 1.0 1.0 1 1.0 1.0 1 2 2 2.0 2.0 2 2.0 2.0 2 3 3 3.0 3.0 3 3.0 3.0 3 -query II??I??I +query IIRRIRRI SELECT * FROM foo, bar WHERE foo.a = bar.a AND foo.b = bar.b ---- 1 1 1.0 1.0 1 1.0 1.0 1 2 2 2.0 2.0 2 2.0 2.0 2 3 3 3.0 3.0 3 3.0 3.0 3 -query II???I +query IIRRRI SELECT * FROM foo JOIN bar USING (a, b) WHERE foo.c = bar.c AND foo.d = bar.d ---- 1 1 1.0 1.0 1.0 1 2 2 2.0 2.0 2.0 2 3 3 3.0 3.0 3.0 3 -query TII +query III SELECT * FROM onecolumn AS a(x) RIGHT JOIN twocolumn ON false order by y ---- NULL 42 53 diff --git a/tests/sqllogictests/suites/mode/cluster/explain_v2.test b/tests/sqllogictests/suites/mode/cluster/explain_v2.test index 1e298e1502ade..61acb66b96428 100644 --- a/tests/sqllogictests/suites/mode/cluster/explain_v2.test +++ b/tests/sqllogictests/suites/mode/cluster/explain_v2.test @@ -46,7 +46,7 @@ Exchange ├── exchange type: Merge └── Filter ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] - ├── filters: [(t1.a (#0) > 3 OR (t2.a (#2) > 5 AND t1.a (#0) > 1))] + ├── filters: [t1.a (#0) > 3 or t2.a (#2) > 5 and t1.a (#0) > 1] ├── estimated rows: 99.68 └── HashJoin ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] @@ -63,7 +63,7 @@ Exchange │ ├── exchange type: Broadcast │ └── Filter │ ├── output columns: [t2.a (#2), t2.b (#3)] - │ ├── filters: [(t2.a (#2) > 3 OR t2.a (#2) > 1)] + │ ├── filters: [t2.a (#2) > 3 or t2.a (#2) > 1] │ ├── estimated rows: 99.92 │ └── TableScan │ ├── table: default.default.t2 @@ -74,11 +74,11 @@ Exchange │ ├── partitions total: 3 │ ├── partitions scanned: 3 │ ├── pruning stats: [segments: >, blocks: >] - │ ├── push downs: [filters: [(t2.a (#2) > 3 OR t2.a (#2) > 1)], limit: NONE] + │ ├── push downs: [filters: [t2.a (#2) > 3 or t2.a (#2) > 1], limit: NONE] │ └── estimated rows: 100.00 └── Filter(Probe) ├── output columns: [t1.a (#0), t1.b (#1)] - ├── filters: [(t1.a (#0) > 3 OR t1.a (#0) > 1)] + ├── filters: [t1.a (#0) > 3 or t1.a (#0) > 1] ├── estimated rows: 99.92 └── TableScan ├── table: default.default.t1 @@ -89,7 +89,7 @@ Exchange ├── partitions total: 3 ├── partitions scanned: 3 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [(t1.a (#0) > 3 OR t1.a (#0) > 1)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) > 3 or t1.a (#0) > 1], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 100.00 diff --git a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test index 215416803a5c6..d67bf4eebed7e 100644 --- a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test +++ b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test @@ -538,7 +538,7 @@ EvalScalar │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] - │ ├── push downs: [filters: [and_filters(and_filters(is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)), is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL))), is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL)))], limit: NONE] + │ ├── push downs: [filters: [is_not_null(CAST(a.content_object['category_a'] (#6) AS String NULL)) and is_not_null(CAST(a.content_object['category_b'] (#7) AS String NULL)) and is_not_null(CAST(CAST(CAST(a.content_object['event_date'] (#8) AS Int64 NULL) AS Timestamp NULL) AS Date NULL))], limit: NONE] │ ├── virtual columns: [content_object['category_a'], content_object['category_b'], content_object['event_date'], metadata_object['type']] │ └── estimated rows: 1.00 └── EvalScalar(Probe) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/bloom_filter.test b/tests/sqllogictests/suites/mode/standalone/explain/bloom_filter.test index 271421247eaa6..9e238cea754bf 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/bloom_filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/bloom_filter.test @@ -37,7 +37,7 @@ EvalScalar ├── push downs: [filters: [is_true(bloom_test_t.c1 (#0) = 5)], limit: NONE] └── estimated rows: 6.00 -query T +query I select 1 from bloom_test_t where c1 = 5 ---- 1 @@ -64,26 +64,26 @@ select 1 from bloom_test_t where c1 = 5 # 1. filter using another column (c2), of the same block, where there is one row matched # 1.1 check that filtering on column c2 does work as expected -query T +query I select 1 from bloom_test_t where c2 = 6 ---- 1 # 1.2 check that if incorrect filtering is applied, an empty result set will be returned -query T +query I select count(*) from bloom_test_t where c1 = 6 ---- 0 # 2. filtering on column c2, of the same block, where there is no row matched # 2.1 check that an empty result set returned -query T +query I select count(*) from bloom_test_t where c2 = 5 ---- 0 # 2.2 check that if incorrect filtering is applied, a non-empty result set will be returned -query T +query I select count(*) from bloom_test_t where c1 = 5 ---- 1 @@ -342,7 +342,7 @@ EvalScalar ├── partitions total: 2 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 2 to 1 cost: >] - ├── push downs: [filters: [and_filters(bloom_test_nullable_t.c1 (#0) = 5, bloom_test_nullable_t.c2 (#1) > 1)], limit: NONE] + ├── push downs: [filters: [bloom_test_nullable_t.c1 (#0) = 5 and bloom_test_nullable_t.c2 (#1) > 1], limit: NONE] └── estimated rows: 6.00 query T @@ -386,7 +386,7 @@ explain select * from bloom_test_nullable_t2 where ((c2) in ('1') is null); Filter ├── output columns: [bloom_test_nullable_t2.c0 (#0), bloom_test_nullable_t2.c1 (#1), bloom_test_nullable_t2.c2 (#2)] ├── filters: [NOT is_not_null(bloom_test_nullable_t2.c2 (#2) = '1')] -├── estimated rows: 1.60 +├── estimated rows: 0.40 └── TableScan ├── table: default.default.bloom_test_nullable_t2 ├── scan id: 0 @@ -404,8 +404,8 @@ explain select * from bloom_test_nullable_t2 where (not (not c0)) ---- Filter ├── output columns: [bloom_test_nullable_t2.c0 (#0), bloom_test_nullable_t2.c1 (#1), bloom_test_nullable_t2.c2 (#2)] -├── filters: [is_true(NOT NOT bloom_test_nullable_t2.c0 (#0))] -├── estimated rows: 2.00 +├── filters: [is_true(bloom_test_nullable_t2.c0 (#0))] +├── estimated rows: 1.00 └── TableScan ├── table: default.default.bloom_test_nullable_t2 ├── scan id: 0 @@ -415,5 +415,5 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [is_true(NOT NOT bloom_test_nullable_t2.c0 (#0))], limit: NONE] + ├── push downs: [filters: [is_true(bloom_test_nullable_t2.c0 (#0))], limit: NONE] └── estimated rows: 2.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/clustering.test b/tests/sqllogictests/suites/mode/standalone/explain/clustering.test index 592d13c5aaa60..72adf779d8d6f 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/clustering.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/clustering.test @@ -13,7 +13,7 @@ INSERT INTO test_linear VALUES(2, 1), (2, 2); statement ok ALTER TABLE test_linear RECLUSTER FINAL; -query TTIIRRT +query TT? select * exclude(timestamp) from clustering_information('default','test_linear') ---- (a, b) linear {"average_depth":1.0,"average_overlaps":0.0,"block_depth_histogram":{"00001":2},"constant_block_count":0,"total_block_count":2} diff --git a/tests/sqllogictests/suites/mode/standalone/explain/cte_filter_pushdown.test b/tests/sqllogictests/suites/mode/standalone/explain/cte_filter_pushdown.test index bc818efab4f22..618c5b2ccf6f0 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/cte_filter_pushdown.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/cte_filter_pushdown.test @@ -2,7 +2,7 @@ # This test verifies that filters can be correctly pushed down into materialized CTEs statement ok -create table t (a int, b int, c int); +create or replace table t (a int, b int, c int); statement ok insert into t values (1, 10, 100), (1, 20, 200), (2, 30, 300), (2, 40, 400), (3, 50, 500); @@ -103,7 +103,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [and_filters(cte1.a (#0) > 1, cte1.sum_b (#2) > 25)], limit: NONE] + ├── push downs: [filters: [cte1.a (#0) > 1 and cte1.sum_b (#2) > 25], limit: NONE] └── estimated rows: 3.00 # Test 5: Multiple CTE references with different filters @@ -238,7 +238,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - ├── push downs: [filters: [and_filters(cte1.a (#0) = 1, cte1.b (#1) < 50)], limit: NONE] + ├── push downs: [filters: [cte1.a (#0) = 1 and cte1.b (#1) < 50], limit: NONE] └── estimated rows: 5.00 # Test 9: CTE with aggregation and HAVING clause @@ -289,7 +289,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - ├── push downs: [filters: [and_filters(cte1.a (#0) = 1, cte1.b (#1) > 15)], limit: NONE] + ├── push downs: [filters: [cte1.a (#0) = 1 and cte1.b (#1) > 15], limit: NONE] └── estimated rows: 5.00 # Test 11: Test case that demonstrates nested Filter issue @@ -317,7 +317,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - ├── push downs: [filters: [and_filters(cte1.a (#0) = 1, cte1.sum_b (#2) > 25)], limit: NONE] + ├── push downs: [filters: [cte1.a (#0) = 1 and cte1.sum_b (#2) > 25], limit: NONE] └── estimated rows: 3.00 # Test 12: Test filter pushdown with IN clause @@ -331,7 +331,7 @@ select * from cte1 where a in (1, 2); ---- Filter ├── output columns: [cte1.a (#0), cte1.cnt (#1)] -├── filters: [is_true((cte1.a (#0) = 1 OR cte1.a (#0) = 2))] +├── filters: [cte1.a (#0) = 1 or cte1.a (#0) = 2] ├── estimated rows: 1.67 └── TableScan ├── table: default.default.cte1 @@ -342,7 +342,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - ├── push downs: [filters: [is_true((cte1.a (#0) = 1 OR cte1.a (#0) = 2))], limit: NONE] + ├── push downs: [filters: [cte1.a (#0) = 1 or cte1.a (#0) = 2], limit: NONE] └── estimated rows: 3.00 # Test 13: Test filter pushdown with complex conditions @@ -356,7 +356,7 @@ select * from cte1 where (a = 1 or a = 2) and b > 10; ---- Filter ├── output columns: [cte1.a (#0), cte1.b (#1), cte1.cnt (#2)] -├── filters: [is_true(cte1.b (#1) > 10), is_true((cte1.a (#0) = 1 OR cte1.a (#0) = 2))] +├── filters: [is_true(cte1.b (#1) > 10), cte1.a (#0) = 1 or cte1.a (#0) = 2] ├── estimated rows: 2.78 └── TableScan ├── table: default.default.cte1 @@ -367,7 +367,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - ├── push downs: [filters: [and_filters(cte1.b (#1) > 10, (cte1.a (#0) = 1 OR cte1.a (#0) = 2))], limit: NONE] + ├── push downs: [filters: [cte1.b (#1) > 10 and (cte1.a (#0) = 1 or cte1.a (#0) = 2)], limit: NONE] └── estimated rows: 5.00 # Test 15: Test filter pushdown with subquery @@ -392,7 +392,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [and_filters(cte1.a (#0) > 1, cte1.cnt (#1) > 0)], limit: NONE] + ├── push downs: [filters: [cte1.a (#0) > 1 and cte1.cnt (#1) > 0], limit: NONE] └── estimated rows: 3.00 # Test 17: CTE with window function diff --git a/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test b/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test index dca2e70d02941..5fe1ed16610d2 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/eliminate_outer_join.test @@ -112,7 +112,7 @@ HashJoin ├── push downs: [filters: [], limit: NONE] └── estimated rows: 10.00 -query +query T explain select * from t left join t t1 on t.a = t1.a where t1.a is not null ---- HashJoin @@ -607,7 +607,7 @@ explain select * from t left join t t1 on t.a = t1.a where t1.a <= 1 or t.a < 1 ---- Filter ├── output columns: [t.a (#0), t1.a (#1)] -├── filters: [is_true((t1.a (#1) <= 1 OR t.a (#0) < 1))] +├── filters: [t1.a (#1) <= 1 or t.a (#0) < 1] ├── estimated rows: 2.80 └── HashJoin ├── output columns: [t.a (#0), t1.a (#1)] @@ -645,7 +645,7 @@ explain select * from t left join t t1 on t.a = t1.a where t1.a <= 1 or (t.a > 1 ---- Filter ├── output columns: [t.a (#0), t1.a (#1)] -├── filters: [is_true((t1.a (#1) <= 1 OR (t.a (#0) > 1 AND t1.a (#1) > 1)))] +├── filters: [t1.a (#1) <= 1 or t.a (#0) > 1 and t1.a (#1) > 1] ├── estimated rows: 7.65 └── HashJoin ├── output columns: [t.a (#0), t1.a (#1)] @@ -659,7 +659,7 @@ Filter ├── estimated rows: 8.40 ├── Filter(Build) │ ├── output columns: [t1.a (#1)] - │ ├── filters: [is_true((t1.a (#1) <= 1 OR t1.a (#1) > 1))] + │ ├── filters: [t1.a (#1) <= 1 or t1.a (#1) > 1] │ ├── estimated rows: 8.40 │ └── TableScan │ ├── table: default.eliminate_outer_join.t @@ -670,11 +670,11 @@ Filter │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] - │ ├── push downs: [filters: [is_true((t.a (#1) <= 1 OR t.a (#1) > 1))], limit: NONE] + │ ├── push downs: [filters: [t.a (#1) <= 1 or t.a (#1) > 1], limit: NONE] │ └── estimated rows: 10.00 └── Filter(Probe) ├── output columns: [t.a (#0)] - ├── filters: [is_true((t.a (#0) <= 1 OR t.a (#0) > 1))] + ├── filters: [t.a (#0) <= 1 or t.a (#0) > 1] ├── estimated rows: 8.40 └── TableScan ├── table: default.eliminate_outer_join.t @@ -685,7 +685,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [is_true((t.a (#0) <= 1 OR t.a (#0) > 1))], limit: NONE] + ├── push downs: [filters: [t.a (#0) <= 1 or t.a (#0) > 1], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 10.00 @@ -694,8 +694,8 @@ explain select * from t left join t t1 on t.a = t1.a where t1.a <= 1 or (t.a > 1 ---- Filter ├── output columns: [t.a (#0), t1.a (#1)] -├── filters: [is_true((t1.a (#1) <= 1 OR (t.a (#0) > 1 AND t.a (#0) < 2)))] -├── estimated rows: 3.60 +├── filters: [t1.a (#1) <= 1 or t.a (#0) > 1 and t.a (#0) < 2] +├── estimated rows: 2.89 └── HashJoin ├── output columns: [t.a (#0), t1.a (#1)] ├── join type: LEFT OUTER diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain.test b/tests/sqllogictests/suites/mode/standalone/explain/explain.test index 398f13037d4a5..ce4f29913f17b 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain.test @@ -46,7 +46,7 @@ explain select * from t1, t2 where (t1.a = t2.a and t1.a > 3) or (t1.a = t2.a an ---- Filter ├── output columns: [t2.a (#2), t2.b (#3), t1.b (#1), t1.a (#0)] -├── filters: [(t1.a (#0) > 3 OR (t2.a (#2) > 5 AND t1.a (#0) > 1))] +├── filters: [t1.a (#0) > 3 or t2.a (#2) > 5 and t1.a (#0) > 1] ├── estimated rows: 0.00 └── HashJoin ├── output columns: [t2.a (#2), t2.b (#3), t1.b (#1), t1.a (#0)] @@ -60,7 +60,7 @@ Filter ├── estimated rows: 0.00 ├── Filter(Build) │ ├── output columns: [t1.a (#0), t1.b (#1)] - │ ├── filters: [(t1.a (#0) > 3 OR t1.a (#0) > 1)] + │ ├── filters: [t1.a (#0) > 3 or t1.a (#0) > 1] │ ├── estimated rows: 0.00 │ └── TableScan │ ├── table: default.default.t1 @@ -71,11 +71,11 @@ Filter │ ├── partitions total: 1 │ ├── partitions scanned: 0 │ ├── pruning stats: [segments: >] - │ ├── push downs: [filters: [(t1.a (#0) > 3 OR t1.a (#0) > 1)], limit: NONE] + │ ├── push downs: [filters: [t1.a (#0) > 3 or t1.a (#0) > 1], limit: NONE] │ └── estimated rows: 1.00 └── Filter(Probe) ├── output columns: [t2.a (#2), t2.b (#3)] - ├── filters: [(t2.a (#2) > 3 OR t2.a (#2) > 1)] + ├── filters: [t2.a (#2) > 3 or t2.a (#2) > 1] ├── estimated rows: 3.40 └── TableScan ├── table: default.default.t2 @@ -86,7 +86,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [(t2.a (#2) > 3 OR t2.a (#2) > 1)], limit: NONE] + ├── push downs: [filters: [t2.a (#2) > 3 or t2.a (#2) > 1], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 5.00 @@ -333,7 +333,7 @@ explain select * from t1,t2 where (t1.a > 1 and t2.a > 2) or (t1.b < 3 and t2.b ---- Filter ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] -├── filters: [((t1.a (#0) > 1 AND t2.a (#2) > 2) OR (t1.b (#1) < 3 AND t2.b (#3) < 4))] +├── filters: [t1.a (#0) > 1 and t2.a (#2) > 2 or t1.b (#1) < 3 and t2.b (#3) < 4] ├── estimated rows: 3.52 └── HashJoin ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] @@ -345,7 +345,7 @@ Filter ├── estimated rows: 4.40 ├── Filter(Build) │ ├── output columns: [t1.a (#0), t1.b (#1)] - │ ├── filters: [(t1.a (#0) > 1 OR t1.b (#1) < 3)] + │ ├── filters: [t1.a (#0) > 1 or t1.b (#1) < 3] │ ├── estimated rows: 1.00 │ └── TableScan │ ├── table: default.default.t1 @@ -356,11 +356,11 @@ Filter │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] - │ ├── push downs: [filters: [(t1.a (#0) > 1 OR t1.b (#1) < 3)], limit: NONE] + │ ├── push downs: [filters: [t1.a (#0) > 1 or t1.b (#1) < 3], limit: NONE] │ └── estimated rows: 1.00 └── Filter(Probe) ├── output columns: [t2.a (#2), t2.b (#3)] - ├── filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)] + ├── filters: [t2.a (#2) > 2 or t2.b (#3) < 4] ├── estimated rows: 4.40 └── TableScan ├── table: default.default.t2 @@ -371,7 +371,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)], limit: NONE] + ├── push downs: [filters: [t2.a (#2) > 2 or t2.b (#3) < 4], limit: NONE] └── estimated rows: 5.00 query T @@ -379,7 +379,7 @@ explain select * from t1,t2 where (t1.a > 1 and t2.a > 2) or (t1.b < 3 and t2.b ---- Filter ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] -├── filters: [or_filters((t1.a (#0) > 1 AND t2.a (#2) > 2), (t1.b (#1) < 3 AND t2.b (#3) < 4), t1.a (#0) = 2)] +├── filters: [t1.a (#0) > 1 and t2.a (#2) > 2 or t1.b (#1) < 3 and t2.b (#3) < 4 or t1.a (#0) = 2] ├── estimated rows: 4.00 └── HashJoin ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] @@ -391,7 +391,7 @@ Filter ├── estimated rows: 5.00 ├── Filter(Build) │ ├── output columns: [t1.a (#0), t1.b (#1)] - │ ├── filters: [or_filters(t1.a (#0) > 1, t1.b (#1) < 3, t1.a (#0) = 2)] + │ ├── filters: [t1.a (#0) > 1 or t1.b (#1) < 3 or t1.a (#0) = 2] │ ├── estimated rows: 1.00 │ └── TableScan │ ├── table: default.default.t1 @@ -402,7 +402,7 @@ Filter │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - │ ├── push downs: [filters: [or_filters(t1.a (#0) > 1, t1.b (#1) < 3, t1.a (#0) = 2)], limit: NONE] + │ ├── push downs: [filters: [t1.a (#0) > 1 or t1.b (#1) < 3 or t1.a (#0) = 2], limit: NONE] │ └── estimated rows: 1.00 └── TableScan(Probe) ├── table: default.default.t2 @@ -431,7 +431,7 @@ HashJoin ├── build keys: [] ├── probe keys: [] ├── keys is null equal: [] -├── filters: [or_filters((t1.a (#0) > 1 AND t2.a (#2) > 2), (t1.b (#1) < 3 AND t2.b (#3) < 4), t3.a (#4) = 2)] +├── filters: [t1.a (#0) > 1 and t2.a (#2) > 2 or t1.b (#1) < 3 and t2.b (#3) < 4 or t3.a (#4) = 2] ├── estimated rows: 50.00 ├── HashJoin(Build) │ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] @@ -488,7 +488,7 @@ HashJoin ├── estimated rows: 28.16 ├── Filter(Build) │ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] -│ ├── filters: [((t1.a (#0) > 1 AND t2.a (#2) > 2) OR (t1.b (#1) < 3 AND t2.b (#3) < 4))] +│ ├── filters: [t1.a (#0) > 1 and t2.a (#2) > 2 or t1.b (#1) < 3 and t2.b (#3) < 4] │ ├── estimated rows: 3.52 │ └── HashJoin │ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] @@ -500,7 +500,7 @@ HashJoin │ ├── estimated rows: 4.40 │ ├── Filter(Build) │ │ ├── output columns: [t1.a (#0), t1.b (#1)] -│ │ ├── filters: [(t1.a (#0) > 1 OR t1.b (#1) < 3)] +│ │ ├── filters: [t1.a (#0) > 1 or t1.b (#1) < 3] │ │ ├── estimated rows: 1.00 │ │ └── TableScan │ │ ├── table: default.default.t1 @@ -511,11 +511,11 @@ HashJoin │ │ ├── partitions total: 1 │ │ ├── partitions scanned: 1 │ │ ├── pruning stats: [segments: >, blocks: >] -│ │ ├── push downs: [filters: [(t1.a (#0) > 1 OR t1.b (#1) < 3)], limit: NONE] +│ │ ├── push downs: [filters: [t1.a (#0) > 1 or t1.b (#1) < 3], limit: NONE] │ │ └── estimated rows: 1.00 │ └── Filter(Probe) │ ├── output columns: [t2.a (#2), t2.b (#3)] -│ ├── filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)] +│ ├── filters: [t2.a (#2) > 2 or t2.b (#3) < 4] │ ├── estimated rows: 4.40 │ └── TableScan │ ├── table: default.default.t2 @@ -526,7 +526,7 @@ HashJoin │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)], limit: NONE] +│ ├── push downs: [filters: [t2.a (#2) > 2 or t2.b (#3) < 4], limit: NONE] │ └── estimated rows: 5.00 └── Filter(Probe) ├── output columns: [t3.a (#4), t3.b (#5)] @@ -558,7 +558,7 @@ Limit ├── estimated rows: 3.85 └── Filter ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] - ├── filters: [(((t1.a (#0) > 1 OR t1.b (#1) < 2) AND t2.a (#2) > 2) OR (t1.b (#1) < 3 AND t2.b (#3) < 4))] + ├── filters: [(t1.a (#0) > 1 or t1.b (#1) < 2) and t2.a (#2) > 2 or t1.b (#1) < 3 and t2.b (#3) < 4] ├── estimated rows: 3.85 └── HashJoin ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] @@ -570,7 +570,7 @@ Limit ├── estimated rows: 4.40 ├── Filter(Build) │ ├── output columns: [t1.a (#0), t1.b (#1)] - │ ├── filters: [or_filters(t1.a (#0) > 1, t1.b (#1) < 2, t1.b (#1) < 3)] + │ ├── filters: [t1.a (#0) > 1 or t1.b (#1) < 2 or t1.b (#1) < 3] │ ├── estimated rows: 1.00 │ └── TableScan │ ├── table: default.default.t1 @@ -581,11 +581,11 @@ Limit │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] - │ ├── push downs: [filters: [or_filters(t1.a (#0) > 1, t1.b (#1) < 2, t1.b (#1) < 3)], limit: NONE] + │ ├── push downs: [filters: [t1.a (#0) > 1 or t1.b (#1) < 2 or t1.b (#1) < 3], limit: NONE] │ └── estimated rows: 1.00 └── Filter(Probe) ├── output columns: [t2.a (#2), t2.b (#3)] - ├── filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)] + ├── filters: [t2.a (#2) > 2 or t2.b (#3) < 4] ├── estimated rows: 4.40 └── TableScan ├── table: default.default.t2 @@ -596,10 +596,10 @@ Limit ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)], limit: NONE] + ├── push downs: [filters: [t2.a (#2) > 2 or t2.b (#3) < 4], limit: NONE] └── estimated rows: 5.00 -query +query T explain select * from t1,t2 where (t1.a > 1 or t1.b < 2) and (t1.a > 1 or t1.b < 2) ---- HashJoin @@ -612,7 +612,7 @@ HashJoin ├── estimated rows: 5.00 ├── Filter(Build) │ ├── output columns: [t1.a (#0), t1.b (#1)] -│ ├── filters: [(t1.a (#0) > 1 OR t1.b (#1) < 2)] +│ ├── filters: [t1.a (#0) > 1 or t1.b (#1) < 2] │ ├── estimated rows: 1.00 │ └── TableScan │ ├── table: default.default.t1 @@ -623,7 +623,7 @@ HashJoin │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [(t1.a (#0) > 1 OR t1.b (#1) < 2)], limit: NONE] +│ ├── push downs: [filters: [t1.a (#0) > 1 or t1.b (#1) < 2], limit: NONE] │ └── estimated rows: 1.00 └── TableScan(Probe) ├── table: default.default.t2 @@ -637,7 +637,7 @@ HashJoin ├── push downs: [filters: [], limit: NONE] └── estimated rows: 5.00 -query +query T explain select count(distinct a) from t1; ---- AggregateFinal @@ -670,7 +670,7 @@ AggregateFinal ├── push downs: [filters: [], limit: NONE] └── estimated rows: 1.00 -query +query T explain select count_distinct(a) from t1; ---- AggregateFinal @@ -703,7 +703,7 @@ AggregateFinal ├── push downs: [filters: [], limit: NONE] └── estimated rows: 1.00 -query +query T explain select * from (values(1, 'a'),(2, 'b')) t(c1,c2) ---- ConstantTableScan @@ -717,22 +717,22 @@ drop table t1 statement ok drop table t2 -query +query T explain syntax select * from read_parquet('p1', 'p2', 'p3'); ---- SELECT * FROM read_parquet('p1', 'p2', 'p3') -query +query T explain syntax select * from read_parquet(prune_page=>true, refresh_meta_cache=>true); ---- SELECT * FROM read_parquet(prune_page=>TRUE,refresh_meta_cache=>TRUE) -query +query T explain syntax select * from read_parquet('p1', 'p2', 'p3', prune_page=>true, refresh_meta_cache=>true); ---- SELECT * FROM read_parquet('p1', 'p2', 'p3',prune_page=>TRUE,refresh_meta_cache=>TRUE) -query +query T explain syntax select * from read_parquet('p1', 'p2', 'p3', prune_page=>true, refresh_meta_cache=>true); ---- SELECT * FROM read_parquet('p1', 'p2', 'p3',prune_page=>TRUE,refresh_meta_cache=>TRUE) @@ -743,7 +743,7 @@ drop table if exists t4 statement ok create OR REPLACE table t4(a int, b string); -query +query T explain select * from t4 where a = 1 and try_cast(get(try_parse_json(b),'bb') as varchar) = 'xx'; ---- Filter @@ -758,7 +758,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t4.a (#0) = 1, TRY_CAST(get(try_parse_json(t4.b (#1)), 'bb') AS String NULL) = 'xx')], limit: NONE] + ├── push downs: [filters: [t4.a (#0) = 1 and TRY_CAST(get(try_parse_json(t4.b (#1)), 'bb') AS String NULL) = 'xx'], limit: NONE] └── estimated rows: 0.00 statement ok @@ -767,7 +767,7 @@ drop view if exists v4 statement ok create view v4 as select a as a, try_cast(get(try_parse_json(b), 'bb') as varchar) as b from t4; -query +query T explain select * from v4 where b = 'xx'; ---- EvalScalar @@ -864,7 +864,7 @@ Limit │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] - │ ├── push downs: [filters: [and_filters(a.c1 (#1) >= 1683648000, a.c1 (#1) <= 1683734400)], limit: NONE] + │ ├── push downs: [filters: [a.c1 (#1) >= 1683648000 and a.c1 (#1) <= 1683734400], limit: NONE] │ └── estimated rows: 1.00 └── Filter(Probe) ├── output columns: [b.id (#2), b.c1 (#3)] @@ -879,7 +879,7 @@ Limit ├── partitions total: 2 ├── partitions scanned: 2 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [and_filters(b.c1 (#3) >= 1683648000, b.c1 (#3) <= 1683734400)], limit: NONE] + ├── push downs: [filters: [b.c1 (#3) >= 1683648000 and b.c1 (#3) <= 1683734400], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 2.00 @@ -1010,13 +1010,13 @@ statement ok drop table b; statement ok -create OR REPLACE table t1(a int, b int); +create or replace table t1(a int, b int); statement ok -insert into t1 values(1, 2), (2, 3), (3, 4); +insert into t1 values(1, 2), (2, 3), (300, 4); query T -explain select * from t1 where a in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300); +explain settings(inlist_to_join_threshold=5) select * from t1 where a in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 3000); ---- HashJoin ├── output columns: [t1.a (#0), t1.b (#1)] @@ -1025,7 +1025,7 @@ HashJoin ├── probe keys: [CAST(CAST(subquery_2 (#2) AS UInt16 NULL) AS Int32 NULL)] ├── keys is null equal: [false] ├── filters: [] -├── estimated rows: 3.00 +├── estimated rows: 2.20 ├── TableScan(Build) │ ├── table: default.default.t1 │ ├── scan id: 0 @@ -1041,22 +1041,22 @@ HashJoin ├── output columns: [col0 (#2)] ├── group by: [col0] ├── aggregate functions: [] - ├── estimated rows: 1297.00 + ├── estimated rows: 22.00 └── AggregatePartial ├── group by: [col0] ├── aggregate functions: [] - ├── estimated rows: 1297.00 + ├── estimated rows: 22.00 └── ConstantTableScan ├── output columns: [col0 (#2)] - └── column 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300] + └── column 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 3000] query T -explain select * from t1 where a not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300); +explain settings(inlist_to_join_threshold=5) select * from t1 where a not in (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 3000); ---- Filter ├── output columns: [t1.a (#0), t1.b (#1)] ├── filters: [is_true(NOT 3 (#3))] -├── estimated rows: 3.00 +├── estimated rows: 1.50 └── HashJoin ├── output columns: [t1.a (#0), t1.b (#1), marker (#3)] ├── join type: LEFT MARK @@ -1080,23 +1080,14 @@ Filter ├── output columns: [col0 (#2)] ├── group by: [col0] ├── aggregate functions: [] - ├── estimated rows: 1297.00 + ├── estimated rows: 22.00 └── AggregatePartial ├── group by: [col0] ├── aggregate functions: [] - ├── estimated rows: 1297.00 + ├── estimated rows: 22.00 └── ConstantTableScan ├── output columns: [col0 (#2)] - └── column 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 607, 608, 609, 610, 611, 612, 613, 614, 615, 616, 617, 618, 619, 620, 621, 622, 623, 624, 625, 626, 627, 628, 629, 630, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 664, 665, 666, 667, 668, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 719, 720, 721, 722, 723, 724, 725, 726, 727, 728, 729, 730, 731, 732, 733, 734, 735, 736, 737, 738, 739, 740, 741, 742, 743, 744, 745, 746, 747, 748, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 774, 775, 776, 777, 778, 779, 780, 781, 782, 783, 784, 785, 786, 787, 788, 789, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 801, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 818, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 833, 834, 835, 836, 837, 838, 839, 840, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 857, 858, 859, 860, 861, 862, 863, 864, 865, 866, 867, 868, 869, 870, 871, 872, 873, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 930, 931, 932, 933, 934, 935, 936, 937, 938, 939, 940, 941, 942, 943, 944, 945, 946, 947, 948, 949, 950, 951, 952, 953, 954, 955, 956, 957, 958, 959, 960, 961, 962, 963, 964, 965, 966, 967, 968, 969, 970, 971, 972, 973, 974, 975, 976, 977, 978, 979, 980, 981, 982, 983, 984, 985, 986, 987, 988, 989, 990, 991, 992, 993, 994, 995, 996, 997, 998, 999, 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007, 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015, 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023, 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031, 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039, 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047, 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055, 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063, 1064, 1065, 1066, 1067, 1068, 1069, 1070, 1071, 1072, 1073, 1074, 1075, 1076, 1077, 1078, 1079, 1080, 1081, 1082, 1083, 1084, 1085, 1086, 1087, 1088, 1089, 1090, 1091, 1092, 1093, 1094, 1095, 1096, 1097, 1098, 1099, 1100, 1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108, 1109, 1110, 1111, 1112, 1113, 1114, 1115, 1116, 1117, 1118, 1119, 1120, 1121, 1122, 1123, 1124, 1125, 1126, 1127, 1128, 1129, 1130, 1131, 1132, 1133, 1134, 1135, 1136, 1137, 1138, 1139, 1140, 1141, 1142, 1143, 1144, 1145, 1146, 1147, 1148, 1149, 1150, 1151, 1152, 1153, 1154, 1155, 1156, 1157, 1158, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1166, 1167, 1168, 1169, 1170, 1171, 1172, 1173, 1174, 1175, 1176, 1177, 1178, 1179, 1180, 1181, 1182, 1183, 1184, 1185, 1186, 1187, 1188, 1189, 1190, 1191, 1192, 1193, 1194, 1195, 1196, 1197, 1198, 1199, 1200, 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, 1213, 1214, 1215, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, 1224, 1225, 1226, 1227, 1228, 1229, 1230, 1231, 1232, 1233, 1234, 1235, 1236, 1237, 1238, 1239, 1240, 1241, 1242, 1243, 1244, 1245, 1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, 1256, 1257, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, 1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1276, 1277, 1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, 1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, 1298, 1299, 1300] - -statement ok -drop table t1; - -statement ok -drop table if exists t1; - -statement ok -drop table if exists t2; + └── column 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 3000] statement ok CREATE OR REPLACE TABLE t1(i int, j int); @@ -1731,11 +1722,11 @@ HashJoin ├── filters: [] ├── build join filters: │ └── filter id:0, build key:numbers.number (#1), probe targets:[a.number (#0)@scan0], filter type:bloom,inlist,min_max -├── estimated rows: 100.00 +├── estimated rows: 25.00 ├── Filter(Build) │ ├── output columns: [numbers.number (#1)] │ ├── filters: [NOT CAST(numbers.number (#1) AS Boolean)] -│ ├── estimated rows: 10.00 +│ ├── estimated rows: 5.00 │ └── TableScan │ ├── table: default.system.numbers │ ├── scan id: 1 @@ -1749,7 +1740,7 @@ HashJoin └── Filter(Probe) ├── output columns: [a.number (#0)] ├── filters: [NOT CAST(a.number (#0) AS Boolean)] - ├── estimated rows: 10.00 + ├── estimated rows: 5.00 └── TableScan ├── table: default.system.numbers ├── scan id: 0 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain_like.test b/tests/sqllogictests/suites/mode/standalone/explain/explain_like.test index 89e6fa6cec52b..ab74bedcefa48 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain_like.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain_like.test @@ -53,7 +53,7 @@ Sort(Single) ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [and_filters(t1.s (#0) >= 'abcd', t1.s (#0) < 'abce')], limit: NONE] + ├── push downs: [filters: [t1.s (#0) >= 'abcd' and t1.s (#0) < 'abce'], limit: NONE] └── estimated rows: 5.00 query T diff --git a/tests/sqllogictests/suites/mode/standalone/explain/explain_pipeline.test b/tests/sqllogictests/suites/mode/standalone/explain/explain_pipeline.test index 73b51e83b3e49..f664570e2b405 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/explain_pipeline.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/explain_pipeline.test @@ -58,7 +58,7 @@ insert into template values(1); statement ok explain pipeline truncate table template; -query T +query I select * from template; ---- 1 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/filter.test b/tests/sqllogictests/suites/mode/standalone/explain/filter.test index bc2944a05cd37..63651bfd6b4a6 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/filter.test @@ -20,7 +20,7 @@ explain select * from t1 where (a = 1 and b > 2) or (a = 1 and b < 100) or (a = ---- Filter ├── output columns: [t1.a (#0), t1.b (#1)] -├── filters: [is_true(t1.a (#0) = 1), is_true((t1.b (#1) > 2 OR t1.b (#1) < 100))] +├── filters: [is_true(t1.a (#0) = 1), t1.b (#1) > 2 or t1.b (#1) < 100] ├── estimated rows: 0.00 └── TableScan ├── table: default.default.t1 @@ -30,7 +30,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) = 1, (t1.b (#1) > 2 OR t1.b (#1) < 100))], limit: NONE] + ├── push downs: [filters: [t1.a (#0) = 1 and (t1.b (#1) > 2 or t1.b (#1) < 100)], limit: NONE] └── estimated rows: 0.00 query T @@ -38,7 +38,7 @@ explain select * from t1 where b > 2 or b > 2 or b < 100; ---- Filter ├── output columns: [t1.a (#0), t1.b (#1)] -├── filters: [is_true((t1.b (#1) > 2 OR t1.b (#1) < 100))] +├── filters: [t1.b (#1) > 2 or t1.b (#1) < 100] ├── estimated rows: 0.00 └── TableScan ├── table: default.default.t1 @@ -48,7 +48,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [is_true((t1.b (#1) > 2 OR t1.b (#1) < 100))], limit: NONE] + ├── push downs: [filters: [t1.b (#1) > 2 or t1.b (#1) < 100], limit: NONE] └── estimated rows: 0.00 statement ok @@ -112,8 +112,8 @@ explain select number from numbers(6) where number = 1 or number = 5 or number = ---- Filter ├── output columns: [numbers.number (#0)] -├── filters: [or_filters(numbers.number (#0) = 1, numbers.number (#0) = 5, numbers.number (#0) = 3)] -├── estimated rows: 5.25 +├── filters: [numbers.number (#0) = 1 or numbers.number (#0) = 5 or numbers.number (#0) = 3] +├── estimated rows: 3.00 └── TableScan ├── table: default.system.numbers ├── scan id: 0 @@ -122,7 +122,7 @@ Filter ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 - ├── push downs: [filters: [or_filters(numbers.number (#0) = 1, numbers.number (#0) = 5, numbers.number (#0) = 3)], limit: NONE] + ├── push downs: [filters: [numbers.number (#0) = 1 or numbers.number (#0) = 5 or numbers.number (#0) = 3], limit: NONE] └── estimated rows: 6.00 statement ok diff --git a/tests/sqllogictests/suites/mode/standalone/explain/index/explain_agg_index.test b/tests/sqllogictests/suites/mode/standalone/explain/index/explain_agg_index.test index a7c428458d5ae..1395f28a96b0d 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/index/explain_agg_index.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/index/explain_agg_index.test @@ -11,10 +11,7 @@ statement ok USE test_index_db statement ok -DROP TABLE IF EXISTS t1 - -statement ok -CREATE TABLE t1(a int not null,b int not null) Engine = Fuse +CREATE OR REPLACE TABLE t1(a int not null,b int not null) Engine = Fuse statement ok DROP AGGREGATING INDEX IF EXISTS idx1 @@ -458,7 +455,7 @@ Sort(Single) ├── estimated rows: 1.00 └── Filter ├── output columns: [onebrc.station_name (#0), onebrc.measurement (#1)] - ├── filters: [is_true(onebrc.measurement (#1) > 0), is_true(onebrc.station_name (#0) = 'Beijing'), is_true((onebrc.measurement (#1) = 1 OR onebrc.measurement (#1) = 2))] + ├── filters: [is_true(onebrc.measurement (#1) > 0), is_true(onebrc.station_name (#0) = 'Beijing'), onebrc.measurement (#1) = 1 or onebrc.measurement (#1) = 2] ├── estimated rows: 0.00 └── TableScan ├── table: default.test_index_db.onebrc @@ -468,9 +465,9 @@ Sort(Single) ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(and_filters(onebrc.measurement (#1) > 0, onebrc.station_name (#0) = 'Beijing'), (onebrc.measurement (#1) = 1 OR onebrc.measurement (#1) = 2))], limit: NONE] + ├── push downs: [filters: [onebrc.measurement (#1) > 0 and onebrc.station_name (#0) = 'Beijing' and (onebrc.measurement (#1) = 1 or onebrc.measurement (#1) = 2)], limit: NONE] ├── aggregating index: [SELECT station_name, measurement, COUNT(), COUNT(measurement), MAX(measurement), MIN(measurement), SUM(measurement) FROM test_index_db.onebrc GROUP BY station_name, measurement] - ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1), index_col_5 (#5), index_col_6 (#6), index_col_3 (#3), index_col_4 (#4)], filter: is_true(((index_col_1 (#1) > CAST(0 AS Float64 NULL) AND index_col_0 (#0) = CAST('Beijing' AS String NULL)) AND (index_col_1 (#1) = CAST(1 AS Float64 NULL) OR index_col_1 (#1) = CAST(2 AS Float64 NULL))))] + ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1), index_col_5 (#5), index_col_6 (#6), index_col_3 (#3), index_col_4 (#4)], filter: is_true(((index_col_1 (#1) > CAST(0 AS Float64 NULL) AND index_col_0 (#0) = CAST('Beijing' AS String NULL)) AND CAST(index_col_1 (#1) = CAST(1 AS Float64 NULL) or index_col_1 (#1) = CAST(2 AS Float64 NULL) AS Boolean NULL)))] └── estimated rows: 0.00 statement ok @@ -556,7 +553,7 @@ Sort(Single) ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(onebrc.measurement (#1) > 2, onebrc.measurement (#1) < 5)], limit: NONE] + ├── push downs: [filters: [onebrc.measurement (#1) > 2 and onebrc.measurement (#1) < 5], limit: NONE] ├── aggregating index: [SELECT station_name, measurement, COUNT(), COUNT(measurement), MAX(measurement), MIN(measurement), SUM(measurement) FROM test_index_db.onebrc WHERE measurement > 1 AND measurement < 5 GROUP BY station_name, measurement] ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1), index_col_5 (#5), index_col_6 (#6), index_col_3 (#3), index_col_4 (#4)], filter: is_true(index_col_1 (#1) > CAST(2 AS Float64 NULL))] └── estimated rows: 0.00 @@ -590,7 +587,7 @@ Sort(Single) ├── estimated rows: 1.00 └── Filter ├── output columns: [onebrc.station_name (#0), onebrc.measurement (#1)] - ├── filters: [is_true((onebrc.station_name (#0) = 'Paris' OR onebrc.station_name (#0) = 'Beijing'))] + ├── filters: [onebrc.station_name (#0) = 'Paris' or onebrc.station_name (#0) = 'Beijing'] ├── estimated rows: 0.00 └── TableScan ├── table: default.test_index_db.onebrc @@ -600,7 +597,7 @@ Sort(Single) ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [is_true((onebrc.station_name (#0) = 'Paris' OR onebrc.station_name (#0) = 'Beijing'))], limit: NONE] + ├── push downs: [filters: [onebrc.station_name (#0) = 'Paris' or onebrc.station_name (#0) = 'Beijing'], limit: NONE] ├── aggregating index: [SELECT station_name, COUNT(), COUNT(measurement), MAX(measurement), MIN(measurement), SUM(measurement) FROM test_index_db.onebrc WHERE station_name IN('Paris', 'Beijing') GROUP BY station_name] ├── rewritten query: [selection: [index_col_0 (#0), index_col_4 (#4), index_col_5 (#5), index_col_2 (#2), index_col_3 (#3)]] └── estimated rows: 0.00 @@ -723,7 +720,7 @@ EvalScalar ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1), index_col_4 (#4), index_col_5 (#5), index_col_3 (#3)]] └── estimated rows: 8.00 -query TII rowsort +query TIIR rowsort select event_name, user_id, max(user_id), avg(id) from t group by event_name, user_id ---- Login 1 1 4.0 @@ -768,7 +765,7 @@ EvalScalar ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1), index_col_4 (#4), index_col_5 (#5), index_col_3 (#3)], filter: is_true(index_col_0 (#0) > CAST(1 AS Int32 NULL))] └── estimated rows: 8.00 -query TII rowsort +query TIIR rowsort select event_name, user_id, max(user_id), avg(id) from t where user_id > 1 group by event_name, user_id ---- Login 2 2 5.0 @@ -815,7 +812,7 @@ Sort(Single) ├── rewritten query: [selection: [index_col_0 (#0), index_col_1 (#1), index_col_4 (#4), index_col_5 (#5), index_col_3 (#3)], filter: is_true(index_col_0 (#0) > CAST(1 AS Int32 NULL))] └── estimated rows: 8.00 -query TII +query TIIR select event_name, user_id, max(user_id), avg(id) from t where user_id > 1 group by event_name, user_id order by event_name, user_id desc ---- Login 4 4 6.0 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/index/explain_inverted_index.test b/tests/sqllogictests/suites/mode/standalone/explain/index/explain_inverted_index.test index 8d1ee10c6e7d5..97a9d3fe0ea94 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/index/explain_inverted_index.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/index/explain_inverted_index.test @@ -35,7 +35,7 @@ EXPLAIN SELECT id, content FROM t1 WHERE query('content:"early bird"') Filter ├── output columns: [t1.id (#0), t1.content (#1)] ├── filters: [t1._search_matched (#2)] -├── estimated rows: 10.00 +├── estimated rows: 5.00 └── TableScan ├── table: default.test_inverted_index_db.t1 ├── scan id: 0 @@ -63,11 +63,11 @@ RowFetch └── Sort(Single) ├── output columns: [t1.id (#0), t1._row_id (#3)] ├── sort keys: [id ASC NULLS LAST] - ├── estimated rows: 10.00 + ├── estimated rows: 5.00 └── Filter ├── output columns: [t1.id (#0), t1._row_id (#3)] ├── filters: [t1._search_matched (#2)] - ├── estimated rows: 10.00 + ├── estimated rows: 5.00 └── TableScan ├── table: default.test_inverted_index_db.t1 ├── scan id: 0 @@ -95,11 +95,11 @@ RowFetch └── Sort(Single) ├── output columns: [t1._search_score (#3), t1._row_id (#4)] ├── sort keys: [_search_score ASC NULLS LAST] - ├── estimated rows: 10.00 + ├── estimated rows: 5.00 └── Filter ├── output columns: [t1._search_score (#3), t1._row_id (#4)] ├── filters: [t1._search_matched (#2)] - ├── estimated rows: 10.00 + ├── estimated rows: 5.00 └── TableScan ├── table: default.test_inverted_index_db.t1 ├── scan id: 0 @@ -127,7 +127,7 @@ RowFetch └── Filter ├── output columns: [t1._row_id (#3)] ├── filters: [t1._search_matched (#2)] - ├── estimated rows: 10.00 + ├── estimated rows: 5.00 └── TableScan ├── table: default.test_inverted_index_db.t1 ├── scan id: 0 @@ -168,7 +168,7 @@ EXPLAIN SELECT id, content FROM t2 WHERE query('content:"early bird"') Filter ├── output columns: [t2.id (#0), t2.content (#1)] ├── filters: [t2._search_matched (#2)] -├── estimated rows: 10.00 +├── estimated rows: 5.00 └── TableScan ├── table: default.test_inverted_index_db.t2 ├── scan id: 0 @@ -192,11 +192,11 @@ Limit └── Sort(Single) ├── output columns: [t2.id (#0), t2.content (#1)] ├── sort keys: [id ASC NULLS LAST] - ├── estimated rows: 10.00 + ├── estimated rows: 5.00 └── Filter ├── output columns: [t2.id (#0), t2.content (#1)] ├── filters: [t2._search_matched (#2)] - ├── estimated rows: 10.00 + ├── estimated rows: 5.00 └── TableScan ├── table: default.test_inverted_index_db.t2 ├── scan id: 0 @@ -214,4 +214,3 @@ USE default statement ok DROP DATABASE IF EXISTS test_inverted_index_db - diff --git a/tests/sqllogictests/suites/mode/standalone/explain/index/explain_ngram_index.test b/tests/sqllogictests/suites/mode/standalone/explain/index/explain_ngram_index.test index fcb2720dd8774..9c4fb1cbd9ef4 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/index/explain_ngram_index.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/index/explain_ngram_index.test @@ -130,7 +130,7 @@ Filter ├── push downs: [filters: [is_true(like(t1.content (#1), '%月无声%'))], limit: NONE] └── estimated rows: 16.00 -query T +query IT SELECT id, content FROM t1 WHERE content LIKE '%your eggs%' ---- 8 Put all your eggs in one basket @@ -193,7 +193,7 @@ TableScan ├── push downs: [filters: [is_true(like(t2.content (#1), '%your eggs'))], limit: NONE] └── estimated rows: 0.03 -query T +query IT SELECT id, content FROM t1 WHERE content LIKE '%your eggs%' ---- 8 Put all your eggs in one basket @@ -316,7 +316,7 @@ INSERT INTO t3 VALUES (3, 'The early bird catches the worm', 'When life gives you lemons, make lemonade'), (4, 'Actions speak louder than words', 'Put all your eggs in one basket'); -query II +query III select block_size, bloom_filter_size, ngram_index_size from fuse_block('test_ngram_index_db', 't3'); ---- 209 894 NULL diff --git a/tests/sqllogictests/suites/mode/standalone/explain/infer_filter.test b/tests/sqllogictests/suites/mode/standalone/explain/infer_filter.test index 9518a47c1a540..839a3aa5d9f82 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/infer_filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/infer_filter.test @@ -259,7 +259,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) <> 1, t1.a (#0) <> 2)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) <> 1 and t1.a (#0) <> 2], limit: NONE] └── estimated rows: 0.00 # a < 1 @@ -297,7 +297,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) <> 1, t1.a (#0) < 2)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) <> 1 and t1.a (#0) < 2], limit: NONE] └── estimated rows: 0.00 # a != 1 and a <= 1 @@ -335,7 +335,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) <> 1, t1.a (#0) <= 2)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) <> 1 and t1.a (#0) <= 2], limit: NONE] └── estimated rows: 0.00 # a > 1; @@ -373,7 +373,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) <> 1, t1.a (#0) > 0)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) <> 1 and t1.a (#0) > 0], limit: NONE] └── estimated rows: 0.00 # a != 1 and a >= 1 @@ -411,7 +411,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) <> 1, t1.a (#0) >= 0)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) <> 1 and t1.a (#0) >= 0], limit: NONE] └── estimated rows: 0.00 # false @@ -474,7 +474,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) < 5, t1.a (#0) <> 2)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) < 5 and t1.a (#0) <> 2], limit: NONE] └── estimated rows: 0.00 # a < 5 @@ -518,7 +518,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) < 5, t1.a (#0) > 2)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) < 5 and t1.a (#0) > 2], limit: NONE] └── estimated rows: 0.00 # a > 10 and a <= 100 @@ -537,7 +537,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) <= 100, t1.a (#0) > 10)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) <= 100 and t1.a (#0) > 10], limit: NONE] └── estimated rows: 0.00 # t1.a > 10 and t2.a > 10 @@ -620,7 +620,7 @@ HashJoin │ │ ├── read size: 0 │ │ ├── partitions total: 0 │ │ ├── partitions scanned: 0 -│ │ ├── push downs: [filters: [and_filters(t3.a (#4) > 5, t3.a (#4) < 10)], limit: NONE] +│ │ ├── push downs: [filters: [t3.a (#4) > 5 and t3.a (#4) < 10], limit: NONE] │ │ └── estimated rows: 0.00 │ └── Filter(Probe) │ ├── output columns: [t2.a (#2), t2.b (#3)] @@ -634,7 +634,7 @@ HashJoin │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 -│ ├── push downs: [filters: [and_filters(t2.a (#2) > 5, t2.a (#2) < 10)], limit: NONE] +│ ├── push downs: [filters: [t2.a (#2) > 5 and t2.a (#2) < 10], limit: NONE] │ ├── apply join filters: [#0] │ └── estimated rows: 0.00 └── Filter(Probe) @@ -649,7 +649,7 @@ HashJoin ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) > 5, t1.a (#0) < 10)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) > 5 and t1.a (#0) < 10], limit: NONE] ├── apply join filters: [#1] └── estimated rows: 0.00 @@ -811,7 +811,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(CAST(t4.a (#0) AS Int32 NULL) = TRY_CAST(t4.b (#1) AS Int32 NULL), is_not_null(strip_null_value(t4.b (#1))))], limit: NONE] + ├── push downs: [filters: [CAST(t4.a (#0) AS Int32 NULL) = TRY_CAST(t4.b (#1) AS Int32 NULL) and is_not_null(strip_null_value(t4.b (#1)))], limit: NONE] └── estimated rows: 0.00 query T diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join.test b/tests/sqllogictests/suites/mode/standalone/explain/join.test index 9390cfacc3a91..7e193b605b80e 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join.test @@ -304,44 +304,44 @@ query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 44 or b.x < 43 ---- HashJoin -├── output columns: [b.x (#1), b.y (#2), a.x (#0)] +├── output columns: [a.x (#0), b.y (#2), b.x (#1)] ├── join type: INNER -├── build keys: [a.x (#0)] -├── probe keys: [b.x (#1)] +├── build keys: [b.x (#1)] +├── probe keys: [a.x (#0)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:0, build key:a.x (#0), probe targets:[b.x (#1)@scan1], filter type:bloom,inlist,min_max -├── estimated rows: 1.33 +│ └── filter id:0, build key:b.x (#1), probe targets:[a.x (#0)@scan0], filter type:bloom,inlist,min_max +├── estimated rows: 2.67 ├── Filter(Build) -│ ├── output columns: [a.x (#0)] -│ ├── filters: [is_true((a.x (#0) > 44 OR a.x (#0) < 43))] -│ ├── estimated rows: 1.33 +│ ├── output columns: [b.x (#1), b.y (#2)] +│ ├── filters: [b.x (#1) > 44 or b.x (#1) < 43] +│ ├── estimated rows: 2.00 │ └── TableScan -│ ├── table: default.default.onecolumn -│ ├── scan id: 0 -│ ├── output columns: [x (#0)] +│ ├── table: default.default.twocolumn +│ ├── scan id: 1 +│ ├── output columns: [x (#1), y (#2)] │ ├── read rows: 4 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [is_true((onecolumn.x (#0) > 44 OR onecolumn.x (#0) < 43))], limit: NONE] +│ ├── push downs: [filters: [twocolumn.x (#1) > 44 or twocolumn.x (#1) < 43], limit: NONE] │ └── estimated rows: 4.00 └── Filter(Probe) - ├── output columns: [b.x (#1), b.y (#2)] - ├── filters: [is_true((b.x (#1) > 44 OR b.x (#1) < 43))] - ├── estimated rows: 2.00 + ├── output columns: [a.x (#0)] + ├── filters: [a.x (#0) > 44 or a.x (#0) < 43] + ├── estimated rows: 2.67 └── TableScan - ├── table: default.default.twocolumn - ├── scan id: 1 - ├── output columns: [x (#1), y (#2)] + ├── table: default.default.onecolumn + ├── scan id: 0 + ├── output columns: [x (#0)] ├── read rows: 4 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [is_true((twocolumn.x (#1) > 44 OR twocolumn.x (#1) < 43))], limit: NONE] + ├── push downs: [filters: [onecolumn.x (#0) > 44 or onecolumn.x (#0) < 43], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 4.00 @@ -371,7 +371,7 @@ HashJoin │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] +│ ├── push downs: [filters: [twocolumn.x (#1) > 42 and twocolumn.x (#1) < 45], limit: NONE] │ └── estimated rows: 4.00 └── Filter(Probe) ├── output columns: [a.x (#0)] @@ -386,7 +386,7 @@ HashJoin ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] + ├── push downs: [filters: [onecolumn.x (#0) > 42 and onecolumn.x (#0) < 45], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 4.00 @@ -397,8 +397,8 @@ explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where ---- Filter ├── output columns: [a.x (#0), b.x (#1), b.y (#2)] -├── filters: [is_true((b.x (#1) > 44 OR a.x (#0) < 43))] -├── estimated rows: 1.78 +├── filters: [b.x (#1) > 44 or a.x (#0) < 43] +├── estimated rows: 3.56 └── HashJoin ├── output columns: [a.x (#0), b.x (#1), b.y (#2)] ├── join type: LEFT OUTER @@ -454,7 +454,7 @@ HashJoin │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] +│ ├── push downs: [filters: [onecolumn.x (#0) > 42 and onecolumn.x (#0) < 45], limit: NONE] │ └── estimated rows: 4.00 └── Filter(Probe) ├── output columns: [b.x (#1), b.y (#2)] @@ -469,7 +469,7 @@ HashJoin ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] + ├── push downs: [filters: [twocolumn.x (#1) > 42 and twocolumn.x (#1) < 45], limit: NONE] └── estimated rows: 4.00 statement ok @@ -1260,7 +1260,7 @@ HashJoin ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.b (#1) >= 3, CAST(t1.b (#1) AS Int64 NULL) <= 5)], limit: NONE] + ├── push downs: [filters: [t1.b (#1) >= 3 and CAST(t1.b (#1) AS Int64 NULL) <= 5], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 0.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/lazy_read.test b/tests/sqllogictests/suites/mode/standalone/explain/lazy_read.test index 4a022cb88df24..db3451045d274 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/lazy_read.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/lazy_read.test @@ -113,7 +113,7 @@ RowFetch ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(and_filters(t_11831.time (#3) >= 1686672000000, t_11831.time (#3) <= 1686758399000), t_11831.uid (#0) = 11)], limit: NONE] + ├── push downs: [filters: [t_11831.time (#3) >= 1686672000000 and t_11831.time (#3) <= 1686758399000 and t_11831.uid (#0) = 11], limit: NONE] └── estimated rows: 0.00 statement ok @@ -221,7 +221,7 @@ Limit └── estimated rows: 0.00 # Will use lazy materialization -query ? +query T explain with cte as (select a, a + 1 as X, b + 1 as Y from t_lazy order by a limit 3) select X,Y,a from (select * from cte order by Y limit 2) order by X limit 1; ---- Limit diff --git a/tests/sqllogictests/suites/mode/standalone/explain/merge_into.test b/tests/sqllogictests/suites/mode/standalone/explain/merge_into.test index 34a8a1a749569..80e5b00665b3e 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/merge_into.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/merge_into.test @@ -16,7 +16,7 @@ INSERT INTO employees2 VALUES(1, 'Alice', 'HR'),(2, 'Bob', 'IT'),(3, 'Charlie', statement ok INSERT INTO salaries2 VALUES(1, 50000.00),(2, 60000.00); -query TT +query II MERGE INTO salaries2 USING (SELECT * FROM employees2) as employees2 ON salaries2.employee_id = employees2.employee_id WHEN MATCHED AND employees2.department = 'HR' THEN UPDATE SET salaries2.salary = salaries2.salary + 1000.00 WHEN MATCHED THEN UPDATE SET salaries2.salary = salaries2.salary + 500.00 WHEN NOT MATCHED THEN INSERT (employee_id, salary) VALUES (employees2.employee_id, 55000.00); ---- 2 2 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/prewhere_optimization.test b/tests/sqllogictests/suites/mode/standalone/explain/prewhere_optimization.test index 0e0f60151552d..9fb7fcd44cc11 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/prewhere_optimization.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/prewhere_optimization.test @@ -45,7 +45,7 @@ explain select * from t_where_optimizer where a = 1 or b > 2 ---- Filter ├── output columns: [t_where_optimizer.a (#0), t_where_optimizer.b (#1)] -├── filters: [is_true((t_where_optimizer.a (#0) = 1 OR t_where_optimizer.b (#1) > 2))] +├── filters: [t_where_optimizer.a (#0) = 1 or t_where_optimizer.b (#1) > 2] ├── estimated rows: 0.00 └── TableScan ├── table: default.default.t_where_optimizer @@ -55,7 +55,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [is_true((t_where_optimizer.a (#0) = 1 OR t_where_optimizer.b (#1) > 2))], limit: NONE] + ├── push downs: [filters: [t_where_optimizer.a (#0) = 1 or t_where_optimizer.b (#1) > 2], limit: NONE] └── estimated rows: 0.00 query T @@ -73,7 +73,7 @@ Filter ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t_where_optimizer.a (#0) = 1, t_where_optimizer.b (#1) > 2)], limit: NONE] + ├── push downs: [filters: [t_where_optimizer.a (#0) = 1 and t_where_optimizer.b (#1) > 2], limit: NONE] └── estimated rows: 0.00 query T diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test index 12eb0fdded291..7ed166717f775 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test @@ -214,7 +214,7 @@ HashJoin │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [and_filters(t2.a (#2) > 0, t2.b (#3) > 0)], limit: NONE] +│ ├── push downs: [filters: [t2.a (#2) > 0 and t2.b (#3) > 0], limit: NONE] │ └── estimated rows: 3.00 └── Filter(Probe) ├── output columns: [t1.a (#0), t1.b (#1)] @@ -275,7 +275,7 @@ HashJoin ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [and_filters(t1.a (#0) > 0, t1.b (#1) > 0)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) > 0 and t1.b (#1) > 0], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 4.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test index 992024abf7f75..7c81c8ce4e657 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test @@ -69,8 +69,8 @@ explain select * from t1 inner join t2 on t1.a = t2.a where t2.a <= 2 or (t1.a > ---- Filter ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] -├── filters: [is_true((t2.a (#2) <= 2 OR (t1.a (#0) > 1 AND t2.a (#2) > 1)))] -├── estimated rows: 2.00 +├── filters: [t2.a (#2) <= 2 or t1.a (#0) > 1 and t2.a (#2) > 1] +├── estimated rows: 3.56 └── HashJoin ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] ├── join type: INNER @@ -80,11 +80,11 @@ Filter ├── filters: [] ├── build join filters: │ └── filter id:0, build key:t2.a (#2), probe targets:[t1.a (#0)@scan0], filter type:bloom,inlist,min_max - ├── estimated rows: 2.67 + ├── estimated rows: 3.56 ├── Filter(Build) │ ├── output columns: [t2.a (#2), t2.b (#3)] - │ ├── filters: [is_true((t2.a (#2) <= 2 OR t2.a (#2) > 1))] - │ ├── estimated rows: 2.25 + │ ├── filters: [t2.a (#2) <= 2 or t2.a (#2) > 1] + │ ├── estimated rows: 3.00 │ └── TableScan │ ├── table: default.default.t2 │ ├── scan id: 1 @@ -94,11 +94,11 @@ Filter │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] - │ ├── push downs: [filters: [is_true((t2.a (#2) <= 2 OR t2.a (#2) > 1))], limit: NONE] + │ ├── push downs: [filters: [t2.a (#2) <= 2 or t2.a (#2) > 1], limit: NONE] │ └── estimated rows: 3.00 └── Filter(Probe) ├── output columns: [t1.a (#0), t1.b (#1)] - ├── filters: [is_true((t1.a (#0) <= 2 OR t1.a (#0) > 1))] + ├── filters: [t1.a (#0) <= 2 or t1.a (#0) > 1] ├── estimated rows: 3.56 └── TableScan ├── table: default.default.t1 @@ -109,7 +109,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [is_true((t1.a (#0) <= 2 OR t1.a (#0) > 1))], limit: NONE] + ├── push downs: [filters: [t1.a (#0) <= 2 or t1.a (#0) > 1], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 4.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test index b60012dc93b29..6248a61fe3ea3 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test @@ -299,7 +299,7 @@ insert into t select number from numbers(50); statement ok insert into t1 select number from numbers(10) where number >4; -query I +query T explain select t1.a, t2.action as action from (select a,'INSERT' as action from t where a>1) t2 left join t1 on t2.a=t1.a where action != 'INSERT'; ---- HashJoin diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_project_set.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_project_set.test index 759ab3d603677..0439a13be58d9 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_project_set.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_project_set.test @@ -36,10 +36,10 @@ EvalScalar ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', json_path_query_first(products.details (#1), '$.features.*') = '"16GB"')], limit: NONE] + ├── push downs: [filters: [products.name (#0) = 'Laptop' and json_path_query_first(products.details (#1), '$.features.*') = '"16GB"'], limit: NONE] └── estimated rows: 3.00 -query T +query T?? select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB' and all_features = '512GB'; ---- Laptop "512GB" "16GB" @@ -68,10 +68,10 @@ EvalScalar ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', json_path_query_first(products.details (#1), '$.features.*') = '"16GB"')], limit: NONE] + ├── push downs: [filters: [products.name (#0) = 'Laptop' and json_path_query_first(products.details (#1), '$.features.*') = '"16GB"'], limit: NONE] └── estimated rows: 3.00 -query T +query T?? select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB'; ---- Laptop "16GB" "16GB" diff --git a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_scan.test b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_scan.test index 64783f959e0a5..0918f24981c5f 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_scan.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/push_down_filter/push_down_filter_scan.test @@ -8,7 +8,7 @@ create table t (x INT); statement ok insert into t(x) values (1), (2); -query I +query T explain select * from t as a(id) where a.id > 1; ---- Filter diff --git a/tests/sqllogictests/suites/mode/standalone/explain/selectivity/pr_16069.test b/tests/sqllogictests/suites/mode/standalone/explain/selectivity/pr_16069.test index 414c1fbd40a63..f7468def28274 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/selectivity/pr_16069.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/selectivity/pr_16069.test @@ -9,7 +9,7 @@ explain select * from t where 10 < number or 20 < number; ---- Filter ├── output columns: [t.number (#0)] -├── filters: [(10 < t.number (#0) OR 20 < t.number (#0))] +├── filters: [10 < t.number (#0) or 20 < t.number (#0)] ├── estimated rows: 97.69 └── TableScan ├── table: default.default.t @@ -20,7 +20,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [(10 < t.number (#0) OR 20 < t.number (#0))], limit: NONE] + ├── push downs: [filters: [10 < t.number (#0) or 20 < t.number (#0)], limit: NONE] └── estimated rows: 100.00 statement ok diff --git a/tests/sqllogictests/suites/mode/standalone/explain/subquery.test b/tests/sqllogictests/suites/mode/standalone/explain/subquery.test index 657e0fac1516e..6798427bd587c 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/subquery.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/subquery.test @@ -84,8 +84,8 @@ explain select t.number from numbers(1) as t where exists (select t1.number from ---- Filter ├── output columns: [t.number (#0)] -├── filters: [(is_true(2 (#2)) OR t.number (#0) > 1)] -├── estimated rows: 0.60 +├── filters: [is_true(2 (#2)) or t.number (#0) > 1] +├── estimated rows: 0.50 └── HashJoin ├── output columns: [t.number (#0), marker (#2)] ├── join type: RIGHT MARK @@ -352,7 +352,7 @@ HashJoin │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 -│ ├── push downs: [filters: [and_filters(numbers.number (#1) = 0, numbers.number (#1) < 10)], limit: NONE] +│ ├── push downs: [filters: [numbers.number (#1) = 0 and numbers.number (#1) < 10], limit: NONE] │ └── estimated rows: 1.00 └── Filter(Probe) ├── output columns: [t.number (#0)] @@ -728,8 +728,8 @@ explain select t.number from numbers(10) as t where t.number in (select * from u ---- Filter ├── output columns: [t.number (#0)] -├── filters: [(t.number (#0) = 1 OR t.number (#0) = 3)] -├── estimated rows: 7.50 +├── filters: [t.number (#0) = 1 or t.number (#0) = 3] +├── estimated rows: 5.00 └── TableScan ├── table: default.system.numbers ├── scan id: 0 @@ -738,7 +738,7 @@ Filter ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 - ├── push downs: [filters: [(numbers.number (#0) = 1 OR numbers.number (#0) = 3)], limit: NONE] + ├── push downs: [filters: [numbers.number (#0) = 1 or numbers.number (#0) = 3], limit: NONE] └── estimated rows: 10.00 query T diff --git a/tests/sqllogictests/suites/mode/standalone/explain/table_sample.test b/tests/sqllogictests/suites/mode/standalone/explain/table_sample.test index 8b2bb92263a2f..b8cb4cc85ebdb 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/table_sample.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/table_sample.test @@ -33,12 +33,12 @@ drop table t; statement ok create or replace table t as select number as a from numbers(10000000); -query I +query B select count(a) < 10000000 from t sample block(50); ---- 1 -query I +query B select count(a) < 10000000 from t sample block(50) row(10); ---- 1 diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/bloom_filter.test b/tests/sqllogictests/suites/mode/standalone/explain_native/bloom_filter.test index 6705a68328a28..02d7a1bfc46f6 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/bloom_filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/bloom_filter.test @@ -29,7 +29,7 @@ TableScan ├── push downs: [filters: [is_true(bloom_test_t.c1 (#0) = 5)], limit: NONE] └── estimated rows: 1.50 -query T +query II select * from bloom_test_t where c1 = 5 ---- 5 6 @@ -56,26 +56,26 @@ select * from bloom_test_t where c1 = 5 # 1. filter using another column (c2), of the same block, where there is one row matched # 1.1 check that filtering on column c2 does work as expected -query T +query II select * from bloom_test_t where c2 = 6 ---- 5 6 # 1.2 check that if incorrect filtering is applied, an empty result set will be returned -query T +query I select count(*) from bloom_test_t where c1 = 6 ---- 0 # 2. filtering on column c2, of the same block, where there is no row matched # 2.1 check that an empty result set returned -query T +query I select count(*) from bloom_test_t where c2 = 5 ---- 0 # 2.2 check that if incorrect filtering is applied, a non-empty result set will be returned -query T +query I select count(*) from bloom_test_t where c1 = 5 ---- 1 @@ -147,7 +147,7 @@ TableScan ├── partitions total: 2 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 2 to 1 cost: >] -├── push downs: [filters: [and_filters(bloom_test_nullable_t.c1 (#0) = 5, bloom_test_nullable_t.c2 (#1) > 1)], limit: NONE] +├── push downs: [filters: [bloom_test_nullable_t.c1 (#0) = 5 and bloom_test_nullable_t.c2 (#1) > 1], limit: NONE] └── estimated rows: 1.50 # fix https://github.com/datafuselabs/databend/issues/15570 @@ -175,7 +175,7 @@ TableScan ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] ├── push downs: [filters: [NOT is_not_null(bloom_test_nullable_t2.c2 (#2) = '1')], limit: NONE] -└── estimated rows: 1.60 +└── estimated rows: 0.40 query T explain select * from bloom_test_nullable_t2 where (not (not c0)) @@ -189,5 +189,5 @@ TableScan ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] -├── push downs: [filters: [is_true(NOT NOT bloom_test_nullable_t2.c0 (#0))], limit: NONE] -└── estimated rows: 2.00 +├── push downs: [filters: [is_true(bloom_test_nullable_t2.c0 (#0))], limit: NONE] +└── estimated rows: 1.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test b/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test index f58dae2b10322..cfe77e77bd82c 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/explain.test @@ -30,7 +30,7 @@ explain select * from t1, t2 where (t1.a = t2.a and t1.a > 3) or (t1.a = t2.a an ---- Filter ├── output columns: [t2.a (#2), t2.b (#3), t1.b (#1), t1.a (#0)] -├── filters: [(t1.a (#0) > 3 OR (t2.a (#2) > 5 AND t1.a (#0) > 1))] +├── filters: [t1.a (#0) > 3 or t2.a (#2) > 5 and t1.a (#0) > 1] ├── estimated rows: 0.00 └── HashJoin ├── output columns: [t2.a (#2), t2.b (#3), t1.b (#1), t1.a (#0)] @@ -51,7 +51,7 @@ Filter │ ├── partitions total: 1 │ ├── partitions scanned: 0 │ ├── pruning stats: [segments: >] - │ ├── push downs: [filters: [(t1.a (#0) > 3 OR t1.a (#0) > 1)], limit: NONE] + │ ├── push downs: [filters: [t1.a (#0) > 3 or t1.a (#0) > 1], limit: NONE] │ └── estimated rows: 0.00 └── TableScan(Probe) ├── table: default.default.t2 @@ -62,7 +62,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [(t2.a (#2) > 3 OR t2.a (#2) > 1)], limit: NONE] + ├── push downs: [filters: [t2.a (#2) > 3 or t2.a (#2) > 1], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 3.40 @@ -309,7 +309,7 @@ explain select * from t1,t2 where (t1.a > 1 and t2.a > 2) or (t1.b < 3 and t2.b ---- Filter ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] -├── filters: [((t1.a (#0) > 1 AND t2.a (#2) > 2) OR (t1.b (#1) < 3 AND t2.b (#3) < 4))] +├── filters: [t1.a (#0) > 1 and t2.a (#2) > 2 or t1.b (#1) < 3 and t2.b (#3) < 4] ├── estimated rows: 3.52 └── HashJoin ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] @@ -328,7 +328,7 @@ Filter │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] - │ ├── push downs: [filters: [(t1.a (#0) > 1 OR t1.b (#1) < 3)], limit: NONE] + │ ├── push downs: [filters: [t1.a (#0) > 1 or t1.b (#1) < 3], limit: NONE] │ └── estimated rows: 1.00 └── TableScan(Probe) ├── table: default.default.t2 @@ -339,7 +339,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)], limit: NONE] + ├── push downs: [filters: [t2.a (#2) > 2 or t2.b (#3) < 4], limit: NONE] └── estimated rows: 4.40 query T @@ -347,7 +347,7 @@ explain select * from t1,t2 where (t1.a > 1 and t2.a > 2) or (t1.b < 3 and t2.b ---- Filter ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] -├── filters: [or_filters((t1.a (#0) > 1 AND t2.a (#2) > 2), (t1.b (#1) < 3 AND t2.b (#3) < 4), t1.a (#0) = 2)] +├── filters: [t1.a (#0) > 1 and t2.a (#2) > 2 or t1.b (#1) < 3 and t2.b (#3) < 4 or t1.a (#0) = 2] ├── estimated rows: 4.00 └── HashJoin ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] @@ -366,7 +366,7 @@ Filter │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - │ ├── push downs: [filters: [or_filters(t1.a (#0) > 1, t1.b (#1) < 3, t1.a (#0) = 2)], limit: NONE] + │ ├── push downs: [filters: [t1.a (#0) > 1 or t1.b (#1) < 3 or t1.a (#0) = 2], limit: NONE] │ └── estimated rows: 1.00 └── TableScan(Probe) ├── table: default.default.t2 @@ -395,7 +395,7 @@ HashJoin ├── build keys: [] ├── probe keys: [] ├── keys is null equal: [] -├── filters: [or_filters((t1.a (#0) > 1 AND t2.a (#2) > 2), (t1.b (#1) < 3 AND t2.b (#3) < 4), t3.a (#4) = 2)] +├── filters: [t1.a (#0) > 1 and t2.a (#2) > 2 or t1.b (#1) < 3 and t2.b (#3) < 4 or t3.a (#4) = 2] ├── estimated rows: 50.00 ├── HashJoin(Build) │ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] @@ -452,7 +452,7 @@ HashJoin ├── estimated rows: 28.16 ├── Filter(Build) │ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] -│ ├── filters: [((t1.a (#0) > 1 AND t2.a (#2) > 2) OR (t1.b (#1) < 3 AND t2.b (#3) < 4))] +│ ├── filters: [t1.a (#0) > 1 and t2.a (#2) > 2 or t1.b (#1) < 3 and t2.b (#3) < 4] │ ├── estimated rows: 3.52 │ └── HashJoin │ ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] @@ -471,7 +471,7 @@ HashJoin │ │ ├── partitions total: 1 │ │ ├── partitions scanned: 1 │ │ ├── pruning stats: [segments: >, blocks: >] -│ │ ├── push downs: [filters: [(t1.a (#0) > 1 OR t1.b (#1) < 3)], limit: NONE] +│ │ ├── push downs: [filters: [t1.a (#0) > 1 or t1.b (#1) < 3], limit: NONE] │ │ └── estimated rows: 1.00 │ └── TableScan(Probe) │ ├── table: default.default.t2 @@ -482,7 +482,7 @@ HashJoin │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)], limit: NONE] +│ ├── push downs: [filters: [t2.a (#2) > 2 or t2.b (#3) < 4], limit: NONE] │ └── estimated rows: 4.40 └── TableScan(Probe) ├── table: default.default.t3 @@ -510,7 +510,7 @@ Limit ├── estimated rows: 3.85 └── Filter ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] - ├── filters: [(((t1.a (#0) > 1 OR t1.b (#1) < 2) AND t2.a (#2) > 2) OR (t1.b (#1) < 3 AND t2.b (#3) < 4))] + ├── filters: [(t1.a (#0) > 1 or t1.b (#1) < 2) and t2.a (#2) > 2 or t1.b (#1) < 3 and t2.b (#3) < 4] ├── estimated rows: 3.85 └── HashJoin ├── output columns: [t2.a (#2), t2.b (#3), t1.a (#0), t1.b (#1)] @@ -529,7 +529,7 @@ Limit │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] - │ ├── push downs: [filters: [or_filters(t1.a (#0) > 1, t1.b (#1) < 2, t1.b (#1) < 3)], limit: NONE] + │ ├── push downs: [filters: [t1.a (#0) > 1 or t1.b (#1) < 2 or t1.b (#1) < 3], limit: NONE] │ └── estimated rows: 1.00 └── TableScan(Probe) ├── table: default.default.t2 @@ -540,7 +540,7 @@ Limit ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [(t2.a (#2) > 2 OR t2.b (#3) < 4)], limit: NONE] + ├── push downs: [filters: [t2.a (#2) > 2 or t2.b (#3) < 4], limit: NONE] └── estimated rows: 4.40 query T @@ -563,7 +563,7 @@ HashJoin │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [(t1.a (#0) > 1 OR t1.b (#1) < 2)], limit: NONE] +│ ├── push downs: [filters: [t1.a (#0) > 1 or t1.b (#1) < 2], limit: NONE] │ └── estimated rows: 1.00 └── TableScan(Probe) ├── table: default.default.t2 @@ -862,7 +862,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t4.a (#0) = 1, TRY_CAST(get(try_parse_json(t4.b (#1)), 'bb') AS String NULL) = 'xx')], limit: NONE] +├── push downs: [filters: [t4.a (#0) = 1 and TRY_CAST(get(try_parse_json(t4.b (#1)), 'bb') AS String NULL) = 'xx'], limit: NONE] └── estimated rows: 0.00 statement ok @@ -925,7 +925,7 @@ INSERT INTO t1 VALUES (1, 2), (2, 3), (3, 4); statement ok INSERT INTO t2 VALUES (1, 10), (2, 20); -query I +query T EXPLAIN SELECT * FROM t1 LEFT OUTER JOIN t2 ON TRUE AND t1.i = t2.k AND FALSE order by i, j; ---- Sort(Single) diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/filter.test b/tests/sqllogictests/suites/mode/standalone/explain_native/filter.test index 42b9cb298196e..0f01e30b2fa8d 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/filter.test @@ -21,7 +21,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t1.a (#0) = 1, (t1.b (#1) > 2 OR t1.b (#1) < 100))], limit: NONE] +├── push downs: [filters: [t1.a (#0) = 1 and (t1.b (#1) > 2 or t1.b (#1) < 100)], limit: NONE] └── estimated rows: 0.00 query T @@ -35,5 +35,5 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [is_true((t1.b (#1) > 2 OR t1.b (#1) < 100))], limit: NONE] +├── push downs: [filters: [t1.b (#1) > 2 or t1.b (#1) < 100], limit: NONE] └── estimated rows: 0.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/infer_filter.test b/tests/sqllogictests/suites/mode/standalone/explain_native/infer_filter.test index becc16a2b3997..a3821098238bc 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/infer_filter.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/infer_filter.test @@ -215,7 +215,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t1.a (#0) <> 1, t1.a (#0) <> 2)], limit: NONE] +├── push downs: [filters: [t1.a (#0) <> 1 and t1.a (#0) <> 2], limit: NONE] └── estimated rows: 0.00 # a < 1 @@ -245,7 +245,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t1.a (#0) <> 1, t1.a (#0) < 2)], limit: NONE] +├── push downs: [filters: [t1.a (#0) <> 1 and t1.a (#0) < 2], limit: NONE] └── estimated rows: 0.00 # a != 1 and a <= 1 @@ -275,7 +275,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t1.a (#0) <> 1, t1.a (#0) <= 2)], limit: NONE] +├── push downs: [filters: [t1.a (#0) <> 1 and t1.a (#0) <= 2], limit: NONE] └── estimated rows: 0.00 # a > 1; @@ -305,7 +305,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t1.a (#0) <> 1, t1.a (#0) > 0)], limit: NONE] +├── push downs: [filters: [t1.a (#0) <> 1 and t1.a (#0) > 0], limit: NONE] └── estimated rows: 0.00 # a != 1 and a >= 1 @@ -335,7 +335,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t1.a (#0) <> 1, t1.a (#0) >= 0)], limit: NONE] +├── push downs: [filters: [t1.a (#0) <> 1 and t1.a (#0) >= 0], limit: NONE] └── estimated rows: 0.00 # false @@ -386,7 +386,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t1.a (#0) < 5, t1.a (#0) <> 2)], limit: NONE] +├── push downs: [filters: [t1.a (#0) < 5 and t1.a (#0) <> 2], limit: NONE] └── estimated rows: 0.00 # a < 5 @@ -422,7 +422,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t1.a (#0) < 5, t1.a (#0) > 2)], limit: NONE] +├── push downs: [filters: [t1.a (#0) < 5 and t1.a (#0) > 2], limit: NONE] └── estimated rows: 0.00 # a > 10 and a <= 100 @@ -437,7 +437,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t1.a (#0) <= 100, t1.a (#0) > 10)], limit: NONE] +├── push downs: [filters: [t1.a (#0) <= 100 and t1.a (#0) > 10], limit: NONE] └── estimated rows: 0.00 # t1.a > 10 and t2.a > 10 @@ -508,7 +508,7 @@ HashJoin │ │ ├── read size: 0 │ │ ├── partitions total: 0 │ │ ├── partitions scanned: 0 -│ │ ├── push downs: [filters: [and_filters(t3.a (#4) > 5, t3.a (#4) < 10)], limit: NONE] +│ │ ├── push downs: [filters: [t3.a (#4) > 5 and t3.a (#4) < 10], limit: NONE] │ │ └── estimated rows: 0.00 │ └── TableScan(Probe) │ ├── table: default.default.t2 @@ -518,7 +518,7 @@ HashJoin │ ├── read size: 0 │ ├── partitions total: 0 │ ├── partitions scanned: 0 -│ ├── push downs: [filters: [and_filters(t2.a (#2) > 5, t2.a (#2) < 10)], limit: NONE] +│ ├── push downs: [filters: [t2.a (#2) > 5 and t2.a (#2) < 10], limit: NONE] │ ├── apply join filters: [#0] │ └── estimated rows: 0.00 └── TableScan(Probe) @@ -529,7 +529,7 @@ HashJoin ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(t1.a (#0) > 5, t1.a (#0) < 10)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) > 5 and t1.a (#0) < 10], limit: NONE] ├── apply join filters: [#1] └── estimated rows: 0.00 @@ -675,7 +675,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(CAST(t4.a (#0) AS Int32 NULL) = TRY_CAST(t4.b (#1) AS Int32 NULL), is_not_null(strip_null_value(t4.b (#1))))], limit: NONE] +├── push downs: [filters: [CAST(t4.a (#0) AS Int32 NULL) = TRY_CAST(t4.b (#1) AS Int32 NULL) and is_not_null(strip_null_value(t4.b (#1)))], limit: NONE] └── estimated rows: 0.00 query T diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/join.test b/tests/sqllogictests/suites/mode/standalone/explain_native/join.test index 1bb65797f895f..3c3c6173d37e6 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/join.test @@ -280,38 +280,38 @@ query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 44 or b.x < 43 ---- HashJoin -├── output columns: [b.x (#1), b.y (#2), a.x (#0)] +├── output columns: [a.x (#0), b.y (#2), b.x (#1)] ├── join type: INNER -├── build keys: [a.x (#0)] -├── probe keys: [b.x (#1)] +├── build keys: [b.x (#1)] +├── probe keys: [a.x (#0)] ├── keys is null equal: [false] ├── filters: [] ├── build join filters: -│ └── filter id:0, build key:a.x (#0), probe targets:[b.x (#1)@scan1], filter type:bloom,inlist,min_max -├── estimated rows: 1.33 +│ └── filter id:0, build key:b.x (#1), probe targets:[a.x (#0)@scan0], filter type:bloom,inlist,min_max +├── estimated rows: 2.67 ├── TableScan(Build) -│ ├── table: default.default.onecolumn -│ ├── scan id: 0 -│ ├── output columns: [x (#0)] +│ ├── table: default.default.twocolumn +│ ├── scan id: 1 +│ ├── output columns: [x (#1), y (#2)] │ ├── read rows: 4 │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [is_true((onecolumn.x (#0) > 44 OR onecolumn.x (#0) < 43))], limit: NONE] -│ └── estimated rows: 1.33 +│ ├── push downs: [filters: [twocolumn.x (#1) > 44 or twocolumn.x (#1) < 43], limit: NONE] +│ └── estimated rows: 2.00 └── TableScan(Probe) - ├── table: default.default.twocolumn - ├── scan id: 1 - ├── output columns: [x (#1), y (#2)] + ├── table: default.default.onecolumn + ├── scan id: 0 + ├── output columns: [x (#0)] ├── read rows: 4 ├── read size: < 1 KiB ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [is_true((twocolumn.x (#1) > 44 OR twocolumn.x (#1) < 43))], limit: NONE] + ├── push downs: [filters: [onecolumn.x (#0) > 44 or onecolumn.x (#0) < 43], limit: NONE] ├── apply join filters: [#0] - └── estimated rows: 2.00 + └── estimated rows: 2.67 query T explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where b.x > 42 and b.x < 45 @@ -335,7 +335,7 @@ HashJoin │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] +│ ├── push downs: [filters: [twocolumn.x (#1) > 42 and twocolumn.x (#1) < 45], limit: NONE] │ └── estimated rows: 2.00 └── TableScan(Probe) ├── table: default.default.onecolumn @@ -346,7 +346,7 @@ HashJoin ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] + ├── push downs: [filters: [onecolumn.x (#0) > 42 and onecolumn.x (#0) < 45], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 2.00 @@ -357,8 +357,8 @@ explain select * from onecolumn as a left join twocolumn as b on a.x = b.x where ---- Filter ├── output columns: [a.x (#0), b.x (#1), b.y (#2)] -├── filters: [is_true((b.x (#1) > 44 OR a.x (#0) < 43))] -├── estimated rows: 1.78 +├── filters: [b.x (#1) > 44 or a.x (#0) < 43] +├── estimated rows: 3.56 └── HashJoin ├── output columns: [a.x (#0), b.x (#1), b.y (#2)] ├── join type: LEFT OUTER @@ -410,7 +410,7 @@ HashJoin │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [and_filters(onecolumn.x (#0) > 42, onecolumn.x (#0) < 45)], limit: NONE] +│ ├── push downs: [filters: [onecolumn.x (#0) > 42 and onecolumn.x (#0) < 45], limit: NONE] │ └── estimated rows: 2.00 └── TableScan(Probe) ├── table: default.default.twocolumn @@ -421,7 +421,7 @@ HashJoin ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [and_filters(twocolumn.x (#1) > 42, twocolumn.x (#1) < 45)], limit: NONE] + ├── push downs: [filters: [twocolumn.x (#1) > 42 and twocolumn.x (#1) < 45], limit: NONE] └── estimated rows: 2.00 statement ok diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/prewhere_optimization.test b/tests/sqllogictests/suites/mode/standalone/explain_native/prewhere_optimization.test index ff144c065e662..51ca0548406d2 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/prewhere_optimization.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/prewhere_optimization.test @@ -64,7 +64,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [is_true((t_where_optimizer.a (#0) = 1 OR t_where_optimizer.b (#1) > 2))], limit: NONE] +├── push downs: [filters: [t_where_optimizer.a (#0) = 1 or t_where_optimizer.b (#1) > 2], limit: NONE] └── estimated rows: 0.00 query T @@ -78,7 +78,7 @@ TableScan ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 -├── push downs: [filters: [and_filters(t_where_optimizer.a (#0) = 1, t_where_optimizer.b (#1) > 2)], limit: NONE] +├── push downs: [filters: [t_where_optimizer.a (#0) = 1 and t_where_optimizer.b (#1) > 2], limit: NONE] └── estimated rows: 0.00 query T diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test index 14ee4b860d5c0..9dcae879fbc8d 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_full_outer.test @@ -186,7 +186,7 @@ HashJoin │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] -│ ├── push downs: [filters: [and_filters(t2.a (#2) > 0, t2.b (#3) > 0)], limit: NONE] +│ ├── push downs: [filters: [t2.a (#2) > 0 and t2.b (#3) > 0], limit: NONE] │ └── estimated rows: 3.00 └── TableScan(Probe) ├── table: default.default.t1 @@ -235,7 +235,7 @@ HashJoin ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [and_filters(t1.a (#0) > 0, t1.b (#1) > 0)], limit: NONE] + ├── push downs: [filters: [t1.a (#0) > 0 and t1.b (#1) > 0], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 4.00 diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test index 5e6f2be96ab34..12bc184304678 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_inner.test @@ -61,8 +61,8 @@ explain select * from t1 inner join t2 on t1.a = t2.a where t2.a <= 2 or (t1.a > ---- Filter ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] -├── filters: [is_true((t2.a (#2) <= 2 OR (t1.a (#0) > 1 AND t2.a (#2) > 1)))] -├── estimated rows: 2.00 +├── filters: [t2.a (#2) <= 2 or t1.a (#0) > 1 and t2.a (#2) > 1] +├── estimated rows: 3.56 └── HashJoin ├── output columns: [t1.a (#0), t1.b (#1), t2.b (#3), t2.a (#2)] ├── join type: INNER @@ -72,7 +72,7 @@ Filter ├── filters: [] ├── build join filters: │ └── filter id:0, build key:t2.a (#2), probe targets:[t1.a (#0)@scan0], filter type:bloom,inlist,min_max - ├── estimated rows: 2.67 + ├── estimated rows: 3.56 ├── TableScan(Build) │ ├── table: default.default.t2 │ ├── scan id: 1 @@ -82,8 +82,8 @@ Filter │ ├── partitions total: 1 │ ├── partitions scanned: 1 │ ├── pruning stats: [segments: >, blocks: >] - │ ├── push downs: [filters: [is_true((t2.a (#2) <= 2 OR t2.a (#2) > 1))], limit: NONE] - │ └── estimated rows: 2.25 + │ ├── push downs: [filters: [t2.a (#2) <= 2 or t2.a (#2) > 1], limit: NONE] + │ └── estimated rows: 3.00 └── TableScan(Probe) ├── table: default.default.t1 ├── scan id: 0 @@ -93,7 +93,7 @@ Filter ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: >] - ├── push downs: [filters: [is_true((t1.a (#0) <= 2 OR t1.a (#0) > 1))], limit: NONE] + ├── push downs: [filters: [t1.a (#0) <= 2 or t1.a (#0) > 1], limit: NONE] ├── apply join filters: [#0] └── estimated rows: 3.56 diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test index e120b57996567..6cb226b60ce11 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_join/push_down_filter_join_left_outer.test @@ -271,7 +271,7 @@ insert into t select number from numbers(50); statement ok insert into t1 select number from numbers(10) where number >4; -query I +query T explain select t1.a, t2.action as action from (select a,'INSERT' as action from t where a>1) t2 left join t1 on t2.a=t1.a where action != 'INSERT'; ---- HashJoin diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_project_set.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_project_set.test index 19610db6efb4e..2a4215580ab51 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_project_set.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_project_set.test @@ -32,10 +32,10 @@ EvalScalar ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', json_path_query_first(products.details (#1), '$.features.*') = '"16GB"')], limit: NONE] + ├── push downs: [filters: [products.name (#0) = 'Laptop' and json_path_query_first(products.details (#1), '$.features.*') = '"16GB"'], limit: NONE] └── estimated rows: 0.60 -query T +query T?? select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB' and all_features = '512GB'; ---- Laptop "512GB" "16GB" @@ -60,10 +60,10 @@ EvalScalar ├── partitions total: 1 ├── partitions scanned: 1 ├── pruning stats: [segments: >, blocks: , bloom pruning: 1 to 1 cost: >] - ├── push downs: [filters: [and_filters(products.name (#0) = 'Laptop', json_path_query_first(products.details (#1), '$.features.*') = '"16GB"')], limit: NONE] + ├── push downs: [filters: [products.name (#0) = 'Laptop' and json_path_query_first(products.details (#1), '$.features.*') = '"16GB"'], limit: NONE] └── estimated rows: 0.60 -query T +query T?? select name, json_path_query(details, '$.features.*') as all_features, json_path_query_first(details, '$.features.*') as first_feature from products where name = 'Laptop' and first_feature = '16GB'; ---- Laptop "16GB" "16GB" diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_scan.test b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_scan.test index 64783f959e0a5..0918f24981c5f 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_scan.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/push_down_filter/push_down_filter_scan.test @@ -8,7 +8,7 @@ create table t (x INT); statement ok insert into t(x) values (1), (2); -query I +query T explain select * from t as a(id) where a.id > 1; ---- Filter diff --git a/tests/sqllogictests/suites/mode/standalone/explain_native/subquery.test b/tests/sqllogictests/suites/mode/standalone/explain_native/subquery.test index 19eea51fc861e..39c1307f72d18 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain_native/subquery.test +++ b/tests/sqllogictests/suites/mode/standalone/explain_native/subquery.test @@ -84,8 +84,8 @@ explain select t.number from numbers(1) as t where exists (select t1.number from ---- Filter ├── output columns: [t.number (#0)] -├── filters: [(is_true(2 (#2)) OR t.number (#0) > 1)] -├── estimated rows: 0.60 +├── filters: [is_true(2 (#2)) or t.number (#0) > 1] +├── estimated rows: 0.50 └── HashJoin ├── output columns: [t.number (#0), marker (#2)] ├── join type: RIGHT MARK @@ -327,7 +327,7 @@ HashJoin │ ├── read size: < 1 KiB │ ├── partitions total: 1 │ ├── partitions scanned: 1 -│ ├── push downs: [filters: [and_filters(numbers.number (#1) = 0, numbers.number (#1) < 10)], limit: NONE] +│ ├── push downs: [filters: [numbers.number (#1) = 0 and numbers.number (#1) < 10], limit: NONE] │ └── estimated rows: 1.00 └── Filter(Probe) ├── output columns: [t.number (#0)] diff --git a/tests/sqllogictests/suites/no_table_meta_cache/explain_native/auto_rebuild_missing_bloom_index.test b/tests/sqllogictests/suites/no_table_meta_cache/explain_native/auto_rebuild_missing_bloom_index.test index 7362444f3d0a9..db54b5560fb74 100644 --- a/tests/sqllogictests/suites/no_table_meta_cache/explain_native/auto_rebuild_missing_bloom_index.test +++ b/tests/sqllogictests/suites/no_table_meta_cache/explain_native/auto_rebuild_missing_bloom_index.test @@ -122,7 +122,7 @@ TableScan ├── push downs: [filters: [is_true(t.s (#1) = '12')], limit: NONE] └── estimated rows: 2.67 -query TT +query IT select * from t where s = '12'; ---- @@ -151,7 +151,7 @@ TableScan ├── push downs: [filters: [is_true(t.c (#0) = 6)], limit: NONE] └── estimated rows: 1.00 -query TT +query IT select * from t where c = 6; ---- @@ -161,7 +161,7 @@ create or replace table t_index(`bloom(0)` binary, `bloom(1)` binary); statement ok copy into t_index from @rebuild_missing_bloom_index_stage pattern = '.*/_i_b_v2/.*.parquet' file_format=(type=parquet missing_field_as=field_default); -query I +query BB select is_null(`bloom(0)`) as b1, is_null(`bloom(1)`) as b2 from t_index order by b2; ---- 0 0 @@ -169,6 +169,6 @@ select is_null(`bloom(0)`) as b1, is_null(`bloom(1)`) as b2 from t_index order # re-generate other bloom index -query TT +query IT select * from t where c = 12; ---- diff --git a/tests/sqllogictests/suites/tpch_iceberg/prune.test b/tests/sqllogictests/suites/tpch_iceberg/prune.test index da5d30097f87d..743acd60cae9c 100644 --- a/tests/sqllogictests/suites/tpch_iceberg/prune.test +++ b/tests/sqllogictests/suites/tpch_iceberg/prune.test @@ -41,11 +41,11 @@ explain select 1 from ctl.tpch.lineitem where l_orderkey < 1 or l_commitdate < EvalScalar ├── output columns: [1 (#16)] ├── expressions: [1] -├── estimated rows: 0.00 +├── estimated rows: 120114.40 └── Filter ├── output columns: [] - ├── filters: [is_true((lineitem.l_orderkey (#0) < 1 OR lineitem.l_commitdate (#11) < '1992-01-31'))] - ├── estimated rows: 0.00 + ├── filters: [lineitem.l_orderkey (#0) < 1 or lineitem.l_commitdate (#11) < '1992-01-31'] + ├── estimated rows: 120114.40 └── TableScan ├── table: ctl.tpch.lineitem ├── scan id: 0 @@ -54,7 +54,7 @@ EvalScalar ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [is_true((lineitem.l_orderkey (#0) < 1 OR lineitem.l_commitdate (#11) < '1992-01-31'))], limit: NONE] + ├── push downs: [filters: [lineitem.l_orderkey (#0) < 1 or lineitem.l_commitdate (#11) < '1992-01-31'], limit: NONE] └── estimated rows: 600572.00 query T @@ -76,7 +76,7 @@ EvalScalar ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(lineitem.l_orderkey (#0) < 1, lineitem.l_commitdate (#11) > '1992-01-31')], limit: NONE] + ├── push downs: [filters: [lineitem.l_orderkey (#0) < 1 and lineitem.l_commitdate (#11) > '1992-01-31'], limit: NONE] └── estimated rows: 600572.00 query T @@ -98,7 +98,7 @@ EvalScalar ├── read size: 0 ├── partitions total: 0 ├── partitions scanned: 0 - ├── push downs: [filters: [and_filters(lineitem.l_orderkey (#0) > 1, lineitem.l_commitdate (#11) = '1992-01-22')], limit: NONE] + ├── push downs: [filters: [lineitem.l_orderkey (#0) > 1 and lineitem.l_commitdate (#11) = '1992-01-22'], limit: NONE] └── estimated rows: 600572.00 @@ -124,7 +124,7 @@ EvalScalar ├── push downs: [filters: [NOT is_not_null(lineitem.l_orderkey (#0))], limit: NONE] └── estimated rows: 600572.00 -query T +query I select count(*) from ctl.tpch.lineitem where l_orderkey is null or l_commitdate is not null; ---- 600572