Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
be839a4
add operation converter class
Tmonster Nov 2, 2023
748eee6
it compiles
Tmonster Nov 2, 2023
ce2225c
convert intersect and except to logical comparison joins
Tmonster Nov 10, 2023
4d1d102
it works. Thanks @lnkuiper
Tmonster Nov 10, 2023
2b926bf
remove unused variable
Tmonster Nov 10, 2023
fe2ab13
no unrecognized parameter
Tmonster Nov 13, 2023
bdb2445
tidy fix
Tmonster Nov 13, 2023
b17b28b
change test file. Figure out this logical execute stuff
Tmonster Nov 13, 2023
962d856
remove cout
Tmonster Nov 13, 2023
901c135
fix resolve types error
Tmonster Nov 13, 2023
85dece8
move test group
Tmonster Nov 13, 2023
66df2fc
figuring out why this execute is being an issue
Tmonster Nov 13, 2023
55e02de
more comments to pick up on
Tmonster Nov 13, 2023
612cc58
logical execute should still resolve types in case resolveTypes is ca…
Tmonster Nov 14, 2023
ce5cee6
remove unused code
Tmonster Nov 14, 2023
020deb5
tidy fixes
Tmonster Nov 14, 2023
af32dd2
clang tidy. more fixes
Tmonster Nov 15, 2023
f006df4
remove redundant return true
Tmonster Nov 15, 2023
27ea407
naming fix
Tmonster Nov 15, 2023
ad4843c
very lost. Dont know how to push an aggregate on the join. I think I …
Tmonster Nov 23, 2023
b486f17
just add a logical distinct instead
Tmonster Nov 24, 2023
72d2317
fix broken tests after adding distinct
Tmonster Nov 27, 2023
8037882
add header
Tmonster Nov 29, 2023
2730dae
clean up PR
Tmonster Dec 4, 2023
a681dc3
try to do everything in the optimizer
Tmonster Dec 19, 2023
e5420fb
all passes now, need to clean this up and move it to the planning phase
Tmonster Dec 19, 2023
2a0a851
honestly having much more trouble moving this to the planner than I t…
Tmonster Dec 19, 2023
bff6a81
very select_list is messing up the results, dont know why
Tmonster Dec 19, 2023
ea998a1
planning and binding is hard to understand
Tmonster Dec 19, 2023
eb4ec51
think I have fixed more, but still getting some erros regarding flatt…
Tmonster Dec 19, 2023
b168da2
remove unused code
Tmonster Dec 19, 2023
c20718b
more removal of dead code
Tmonster Dec 20, 2023
16ca96a
fix merge conflict
Tmonster Jan 3, 2024
ed15d66
temporary stop. need to copy the filters so they can be pushed down t…
Tmonster Jan 3, 2024
29cb2b8
stopping point. it's possible filters are pulled up from the right of…
Tmonster Jan 4, 2024
13c78f5
Merge branch 'pushdown_filters_into_semi_and_anti_joins' into 568-ope…
Tmonster Jan 4, 2024
04bcfa0
make format-fix
Tmonster Jan 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 4 additions & 61 deletions src/execution/physical_plan/plan_set_operation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,6 @@

namespace duckdb {

static vector<unique_ptr<Expression>> CreatePartitionedRowNumExpression(const vector<LogicalType> &types) {
vector<unique_ptr<Expression>> res;
auto expr =
make_uniq<BoundWindowExpression>(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr);
expr->start = WindowBoundary::UNBOUNDED_PRECEDING;
expr->end = WindowBoundary::UNBOUNDED_FOLLOWING;
for (idx_t i = 0; i < types.size(); i++) {
expr->partitions.push_back(make_uniq<BoundReferenceExpression>(types[i], i));
}
res.push_back(std::move(expr));
return res;
}

static JoinCondition CreateNotDistinctComparison(const LogicalType &type, idx_t i) {
JoinCondition cond;
cond.left = make_uniq<BoundReferenceExpression>(type, i);
Expand All @@ -43,6 +30,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSetOperati
throw InvalidInputException("Type mismatch for SET OPERATION");
}

// can't swich logical unions to semi/anti join
// also if the operation is a INTERSECT ALL or EXCEPT ALL
switch (op.type) {
case LogicalOperatorType::LOGICAL_UNION:
// UNION
Expand All @@ -51,54 +40,8 @@ unique_ptr<PhysicalOperator> PhysicalPlanGenerator::CreatePlan(LogicalSetOperati
break;
case LogicalOperatorType::LOGICAL_EXCEPT:
case LogicalOperatorType::LOGICAL_INTERSECT: {
auto &types = left->GetTypes();
vector<JoinCondition> conditions;
// create equality condition for all columns
for (idx_t i = 0; i < types.size(); i++) {
conditions.push_back(CreateNotDistinctComparison(types[i], i));
}
// For EXCEPT ALL / INTERSECT ALL we push a window operator with a ROW_NUMBER into the scans and join to get bag
// semantics.
if (op.setop_all) {
vector<LogicalType> window_types = types;
window_types.push_back(LogicalType::BIGINT);

auto window_left = make_uniq<PhysicalWindow>(window_types, CreatePartitionedRowNumExpression(types),
left->estimated_cardinality);
window_left->children.push_back(std::move(left));
left = std::move(window_left);

auto window_right = make_uniq<PhysicalWindow>(window_types, CreatePartitionedRowNumExpression(types),
right->estimated_cardinality);
window_right->children.push_back(std::move(right));
right = std::move(window_right);

// add window expression result to join condition
conditions.push_back(CreateNotDistinctComparison(LogicalType::BIGINT, types.size()));
// join (created below) now includes the row number result column
op.types.push_back(LogicalType::BIGINT);
}

// EXCEPT is ANTI join
// INTERSECT is SEMI join
PerfectHashJoinStats join_stats; // used in inner joins only

JoinType join_type = op.type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI;
result = make_uniq<PhysicalHashJoin>(op, std::move(left), std::move(right), std::move(conditions), join_type,
op.estimated_cardinality, join_stats);

// For EXCEPT ALL / INTERSECT ALL we need to remove the row number column again
if (op.setop_all) {
vector<unique_ptr<Expression>> projection_select_list;
for (idx_t i = 0; i < types.size(); i++) {
projection_select_list.push_back(make_uniq<BoundReferenceExpression>(types[i], i));
}
auto projection =
make_uniq<PhysicalProjection>(types, std::move(projection_select_list), op.estimated_cardinality);
projection->children.push_back(std::move(result));
result = std::move(projection);
}
break;
throw InternalException(
"Logical Except/Intersect should have been transformed to semi anti before the physical planning phase");
}
default:
throw InternalException("Unexpected operator type for set operation");
Expand Down
2 changes: 1 addition & 1 deletion src/include/duckdb/planner/operator/logical_execute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ class LogicalExecute : public LogicalOperator {

protected:
void ResolveTypes() override {
// already resolved
types = prepared->types;
}
vector<ColumnBinding> GetColumnBindings() override {
return GenerateColumnBindings(0, types.size());
Expand Down
37 changes: 37 additions & 0 deletions src/include/duckdb/planner/operator/logical_filter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,46 @@
#pragma once

#include "duckdb/planner/logical_operator.hpp"
#include "duckdb/planner/operator/logical_comparison_join.hpp"
#include "duckdb/planner/expression/bound_columnref_expression.hpp"

namespace duckdb {

static bool CanFiltersPropogateRightSide(LogicalOperator &op) {
if (op.type != LogicalOperatorType::LOGICAL_COMPARISON_JOIN) {
return false;
}
auto &join = op.Cast<LogicalComparisonJoin>();
if (join.join_type != JoinType::SEMI) {
return false;
}
auto left_bindings = op.children[0]->GetColumnBindings();
auto right_bindings = op.children[1]->GetColumnBindings();
D_ASSERT(left_bindings.size() == right_bindings.size());
// make sure we are comparing every column
if (join.conditions.size() != left_bindings.size()) {
return false;
}
auto &conditions = join.conditions;
for (idx_t i = 0; i < conditions.size(); i++) {
auto &cond = conditions[i];
auto &left = cond.left;
auto &right = cond.right;
if (cond.comparison == ExpressionType::COMPARE_NOT_DISTINCT_FROM) {
if (left->type == ExpressionType::BOUND_COLUMN_REF && right->type == ExpressionType::BOUND_COLUMN_REF) {
auto &left_expr = left->Cast<BoundColumnRefExpression>();
auto &right_expr = right->Cast<BoundColumnRefExpression>();
auto left_match = left_expr.binding == left_bindings[i];
auto right_match = right_expr.binding == right_bindings[i];
if (!(left_match && right_match)) {
return false;
}
}
}
}
return true;
}

//! LogicalFilter represents a filter operation (e.g. WHERE or HAVING clause)
class LogicalFilter : public LogicalOperator {
public:
Expand Down
2 changes: 1 addition & 1 deletion src/main/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
namespace duckdb {

#ifdef DEBUG
bool DBConfigOptions::debug_print_bindings = false;
bool DBConfigOptions::debug_print_bindings = true;
#endif

#define DUCKDB_GLOBAL(_PARAM) \
Expand Down
8 changes: 7 additions & 1 deletion src/optimizer/filter_pullup.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "duckdb/optimizer/filter_pullup.hpp"
#include "duckdb/planner/operator/logical_join.hpp"
#include "duckdb/planner/operator/logical_filter.hpp"

namespace duckdb {

Expand Down Expand Up @@ -40,8 +41,13 @@ unique_ptr<LogicalOperator> FilterPullup::PullupJoin(unique_ptr<LogicalOperator>
case JoinType::INNER:
return PullupInnerJoin(std::move(op));
case JoinType::LEFT:
case JoinType::ANTI:
case JoinType::ANTI: {
return PullupFromLeft(std::move(op));
}
case JoinType::SEMI: {
if (CanFiltersPropogateRightSide(*op)) {
return PullupBothSide(std::move(op));
}
return PullupFromLeft(std::move(op));
}
default:
Expand Down
3 changes: 2 additions & 1 deletion src/optimizer/join_order/relation_statistics_helper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ RelationStats RelationStatisticsHelper::CombineStatsOfNonReorderableOperator(Log
}
ret.stats_initialized = true;
ret.filter_strength = 1;
ret.table_name = child_stats[0].table_name + " joined with " + child_stats[1].table_name;
ret.table_name =
"(" + child_stats[0].table_name + LogicalOperatorToString(op.type) + child_stats[1].table_name + ")";
for (auto &stats : child_stats) {
// MARK joins are nonreorderable. They won't return initialized stats
// continue in this case.
Expand Down
47 changes: 43 additions & 4 deletions src/optimizer/pushdown/pushdown_semi_anti_join.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,55 @@ namespace duckdb {

using Filter = FilterPushdown::Filter;

static void ReplaceBindings(vector<ColumnBinding> &bindings, Filter &filter, Expression &expr,
vector<ColumnBinding> &replacement_bindings) {
if (expr.type == ExpressionType::BOUND_COLUMN_REF) {
auto &colref = expr.Cast<BoundColumnRefExpression>();
D_ASSERT(colref.depth == 0);

// rewrite the binding by looking into the bound_tables list of the subquery
idx_t binding_index = 0;
for (idx_t i = 0; i < bindings.size(); i++) {
if (bindings[i] == colref.binding) {
binding_index = i;
break;
}
}
colref.binding = replacement_bindings[binding_index];
filter.bindings.insert(colref.binding.table_index);
return;
}
ExpressionIterator::EnumerateChildren(
expr, [&](Expression &child) { ReplaceBindings(bindings, filter, child, replacement_bindings); });
}

unique_ptr<LogicalOperator> FilterPushdown::PushdownSemiAntiJoin(unique_ptr<LogicalOperator> op) {
auto &join = op->Cast<LogicalJoin>();
if (op->type == LogicalOperatorType::LOGICAL_DELIM_JOIN) {
return FinishPushdown(std::move(op));
}

// push all current filters down the left side
op->children[0] = Rewrite(std::move(op->children[0]));
FilterPushdown right_pushdown(optimizer);
op->children[1] = right_pushdown.Rewrite(std::move(op->children[1]));
if (CanFiltersPropogateRightSide(*op)) {
auto left_bindings = op->children[0]->GetColumnBindings();
auto right_bindings = op->children[1]->GetColumnBindings();
FilterPushdown right_pushdown(optimizer);
for (idx_t i = 0; i < filters.size(); i++) {
// first create a copy of the filter
auto right_filter = make_uniq<Filter>();
right_filter->filter = filters[i]->filter->Copy();

ReplaceBindings(left_bindings, *right_filter, *right_filter->filter, right_bindings);
right_filter->ExtractBindings();

// move the filters into the child pushdown nodes
right_pushdown.filters.push_back(std::move(right_filter));
}
op->children[0] = Rewrite(std::move(op->children[0]));
op->children[1] = right_pushdown.Rewrite(std::move(op->children[1]));
} else {
// push all current filters down the left side
op->children[0] = Rewrite(std::move(op->children[0]));
}

bool left_empty = op->children[0]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT;
bool right_empty = op->children[1]->type == LogicalOperatorType::LOGICAL_EMPTY_RESULT;
Expand Down
116 changes: 116 additions & 0 deletions src/planner/binder/query_node/plan_setop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,32 @@
#include "duckdb/planner/expression/bound_cast_expression.hpp"
#include "duckdb/planner/expression/bound_columnref_expression.hpp"
#include "duckdb/planner/operator/logical_projection.hpp"
#include "duckdb/planner/operator/logical_window.hpp"
#include "duckdb/planner/expression/bound_reference_expression.hpp"
#include "duckdb/planner/expression/bound_window_expression.hpp"
#include "duckdb/planner/operator/logical_set_operation.hpp"
#include "duckdb/planner/query_node/bound_set_operation_node.hpp"

namespace duckdb {

static unique_ptr<LogicalWindow> CreateWindowWithPartitionedRowNum(idx_t window_table_index,
unique_ptr<LogicalOperator> op) {
// instead create a logical projection on top of whatever to add the window expression, then
auto window = make_uniq<LogicalWindow>(window_table_index);
auto row_number =
make_uniq<BoundWindowExpression>(ExpressionType::WINDOW_ROW_NUMBER, LogicalType::BIGINT, nullptr, nullptr);
row_number->start = WindowBoundary::UNBOUNDED_PRECEDING;
row_number->end = WindowBoundary::CURRENT_ROW_ROWS;
auto bindings = op->GetColumnBindings();
auto types = op->types;
for (idx_t i = 0; i < types.size(); i++) {
row_number->partitions.push_back(make_uniq<BoundColumnRefExpression>(types[i], bindings[i]));
}
window->expressions.push_back(std::move(row_number));
window->AddChild(std::move(op));
return window;
}

// Optionally push a PROJECTION operator
unique_ptr<LogicalOperator> Binder::CastLogicalOperatorToTypes(vector<LogicalType> &source_types,
vector<LogicalType> &target_types,
Expand Down Expand Up @@ -116,9 +137,104 @@ unique_ptr<LogicalOperator> Binder::CreatePlan(BoundSetOperationNode &node) {
break;
}

// here we convert the set operation to anti semi if required. Using the node.setop all we know what conversion we
// need.
auto root = make_uniq<LogicalSetOperation>(node.setop_index, node.types.size(), std::move(left_node),
std::move(right_node), logical_type, node.setop_all);
root->ResolveOperatorTypes();

unique_ptr<LogicalOperator> op;

// if we have an intersect or except, immediately translate it to a semi or anti join.
// Unions stay as they are.
if (logical_type == LogicalOperatorType::LOGICAL_INTERSECT || logical_type == LogicalOperatorType::LOGICAL_EXCEPT) {
auto &left = root->children[0];
auto &right = root->children[1];
auto left_types = root->children[0]->types;
auto right_types = root->children[1]->types;
auto old_bindings = root->GetColumnBindings();
if (node.setop_all) {
auto window_left_table_id = GenerateTableIndex();
root->children[0] = CreateWindowWithPartitionedRowNum(window_left_table_id, std::move(root->children[0]));

auto window_right_table_id = GenerateTableIndex();
root->children[1] = CreateWindowWithPartitionedRowNum(window_right_table_id, std::move(root->children[1]));

root->types.push_back(LogicalType::BIGINT);
root->column_count += 1;
}

auto left_bindings = left->GetColumnBindings();
auto right_bindings = right->GetColumnBindings();
D_ASSERT(left_bindings.size() == right_bindings.size());

vector<JoinCondition> conditions;
// create equality condition for all columns
idx_t binding_offset = node.setop_all ? 1 : 0;
for (idx_t i = 0; i < left_bindings.size() - binding_offset; i++) {
auto cond_type_left = LogicalType(LogicalType::UNKNOWN);
auto cond_type_right = LogicalType(LogicalType::UNKNOWN);
JoinCondition cond;
cond.left = make_uniq<BoundColumnRefExpression>(left_types[i], left_bindings[i]);
cond.right = make_uniq<BoundColumnRefExpression>(right_types[i], right_bindings[i]);
cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
conditions.push_back(std::move(cond));
}

// create condition for the row number as well.
if (node.setop_all) {
JoinCondition cond;
cond.left =
make_uniq<BoundColumnRefExpression>(LogicalType::BIGINT, left_bindings[left_bindings.size() - 1]);
cond.right =
make_uniq<BoundColumnRefExpression>(LogicalType::BIGINT, right_bindings[right_bindings.size() - 1]);
cond.comparison = ExpressionType::COMPARE_NOT_DISTINCT_FROM;
conditions.push_back(std::move(cond));
}

JoinType join_type = root->type == LogicalOperatorType::LOGICAL_EXCEPT ? JoinType::ANTI : JoinType::SEMI;

auto join_op = make_uniq<LogicalComparisonJoin>(join_type);
join_op->children.push_back(std::move(left));
join_op->children.push_back(std::move(right));
join_op->conditions = std::move(conditions);
join_op->ResolveOperatorTypes();

op = std::move(join_op);

// create projection to remove row_id.
if (node.setop_all) {
vector<unique_ptr<Expression>> projection_select_list;
auto bindings = op->GetColumnBindings();
for (idx_t i = 0; i < bindings.size() - 1; i++) {
projection_select_list.push_back(make_uniq<BoundColumnRefExpression>(op->types[i], bindings[i]));
}
auto projection = make_uniq<LogicalProjection>(node.setop_index, std::move(projection_select_list));
projection->children.push_back(std::move(op));
op = std::move(projection);
}

if (!node.setop_all) {
// push a distinct operator on the join
auto &types = op->types;
auto join_bindings = op->GetColumnBindings();
vector<unique_ptr<Expression>> distinct_targets;
vector<unique_ptr<Expression>> select_list;
for (idx_t i = 0; i < join_bindings.size(); i++) {
distinct_targets.push_back(make_uniq<BoundColumnRefExpression>(types[i], join_bindings[i]));
select_list.push_back(make_uniq<BoundColumnRefExpression>(types[i], join_bindings[i]));
}
auto distinct = make_uniq<LogicalDistinct>(std::move(distinct_targets), DistinctType::DISTINCT);
distinct->children.push_back(std::move(op));
op = std::move(distinct);

auto projection = make_uniq<LogicalProjection>(node.setop_index, std::move(select_list));
projection->children.push_back(std::move(op));
op = std::move(projection);
op->ResolveOperatorTypes();
}
return VisitQueryNode(node, std::move(op));
}
return VisitQueryNode(node, std::move(root));
}

Expand Down
Loading