Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
170e0c9
removing projections in mini join order optimizer optimizer
Tmonster Jan 9, 2025
f23b088
most basic example is working. need to also reject certain join possi…
Tmonster Jan 9, 2025
16c8946
filters on right side of left join do not push into right side anymore
Tmonster Jan 10, 2025
585658a
works on tpcds, but the filter reordering for filters above left join…
Tmonster Jan 13, 2025
cf2bd51
still need to add actual column bindings to filters that sit on top o…
Tmonster Jan 16, 2025
ed17dbb
all tests pass in release, but there could be regressions
Tmonster Jan 16, 2025
48dc820
need to invert joins in other places, dont understand why they are no…
Tmonster Jan 17, 2025
2715009
pausing
Tmonster Jan 17, 2025
a29965f
most queries work. filters above left joins seem to disappear however
Tmonster Jan 20, 2025
727bdb2
Add test to check filter above left join
Tmonster Jan 20, 2025
91ccda8
release tests pass
Tmonster Jan 20, 2025
93877dd
some clean up
Tmonster Jan 20, 2025
3993e03
more clean up
Tmonster Jan 20, 2025
d8f0140
more clean up
Tmonster Jan 20, 2025
d4f1b95
add some more tests
Tmonster Jan 21, 2025
65c04ee
Merge branch 'main' into support_left_join_reordering
Tmonster Jan 21, 2025
753b494
some tidy fixes
Tmonster Jan 21, 2025
b56200a
fix some tests
Tmonster Jan 22, 2025
feac93e
fix recursive cte bug
Tmonster Jan 22, 2025
b8e086c
maybe fix last tests
Tmonster Jan 22, 2025
e5ebb9e
hopefully last fixes
Tmonster Jan 23, 2025
abda2f8
hopefully fix regressions
Tmonster Jan 27, 2025
cdeb8c2
ff
Tmonster Jan 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,11 @@ class FilterInfo;

struct DenomInfo {
DenomInfo(JoinRelationSet &numerator_relations, double filter_strength, double denominator)
: numerator_relations(numerator_relations), filter_strength(filter_strength), denominator(denominator) {
: numerator_relations(numerator_relations), extra_multiplier(filter_strength), denominator(denominator) {
}

JoinRelationSet &numerator_relations;
double filter_strength;
double extra_multiplier;
double denominator;
};

Expand Down Expand Up @@ -61,9 +61,11 @@ class FilterInfoWithTotalDomains {
struct Subgraph2Denominator {
optional_ptr<JoinRelationSet> relations;
optional_ptr<JoinRelationSet> numerator_relations;
double numerator_relations_extra;
double denom;

Subgraph2Denominator() : relations(nullptr), numerator_relations(nullptr), denom(1) {};
Subgraph2Denominator()
: relations(nullptr), numerator_relations(nullptr), numerator_relations_extra(1), denom(1) {};
};

class CardinalityHelper {
Expand All @@ -89,6 +91,7 @@ class CardinalityEstimator {
public:
static constexpr double DEFAULT_SEMI_ANTI_SELECTIVITY = 5;
static constexpr double DEFAULT_LT_GT_MULTIPLIER = 2.5;
static constexpr double LEFT_JOIN_COEFFICIENT = 0.008;
explicit CardinalityEstimator() {};

private:
Expand Down
3 changes: 2 additions & 1 deletion src/include/duckdb/optimizer/join_order/cost_model.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ class CostModel {
void InitCostModel();

//! Compute cost of a join relation set
double ComputeCost(DPJoinNode &left, DPJoinNode &right);
double ComputeCost(DPJoinNode &left, DPJoinNode &right, NeighborInfo &connection);
double ComputeJoinCost(DPJoinNode &left, DPJoinNode &right);

//! Cardinality Estimator used to calculate cost
CardinalityEstimator cardinality_estimator;
Expand Down
21 changes: 14 additions & 7 deletions src/include/duckdb/optimizer/join_order/join_order_optimizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,10 @@
#include "duckdb/common/unordered_map.hpp"
#include "duckdb/common/unordered_set.hpp"
#include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
#include "duckdb/optimizer/join_order/join_node.hpp"
#include "duckdb/optimizer/join_order/join_relation.hpp"
#include "duckdb/optimizer/join_order/query_graph.hpp"
#include "duckdb/optimizer/column_binding_replacer.hpp"
#include "duckdb/optimizer/join_order/query_graph_manager.hpp"
#include "duckdb/parser/expression_map.hpp"
#include "duckdb/planner/logical_operator.hpp"
#include "duckdb/planner/logical_operator_visitor.hpp"

#include <functional>

namespace duckdb {

Expand All @@ -30,7 +25,8 @@ class JoinOrderOptimizer {

public:
//! Perform join reordering inside a plan
unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan, optional_ptr<RelationStats> stats = nullptr);
unique_ptr<LogicalOperator> Optimize(unique_ptr<LogicalOperator> plan, optional_ptr<RelationStats> stats = nullptr,
bool remove_projections = false);
//! Adds/gets materialized CTE stats
void AddMaterializedCTEStats(idx_t index, RelationStats &&stats);
RelationStats GetMaterializedCTEStats(idx_t index);
Expand Down Expand Up @@ -62,4 +58,15 @@ class JoinOrderOptimizer {
optional_ptr<RelationStats> delim_scan_stats;
};

class RemoveUnnecessaryProjections {
public:
explicit RemoveUnnecessaryProjections();
unique_ptr<LogicalOperator> RemoveProjections(unique_ptr<LogicalOperator> plan);
unique_ptr<LogicalOperator> RemoveProjectionsChildren(unique_ptr<LogicalOperator> plan);
ColumnBindingReplacer replacer;

private:
bool first_projection;
};

} // namespace duckdb
7 changes: 6 additions & 1 deletion src/include/duckdb/optimizer/join_order/join_relation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@
#pragma once

#include "duckdb/common/common.hpp"
#include "duckdb/common/unordered_map.hpp"
#include "duckdb/common/unordered_set.hpp"
#include "duckdb/common/optional_ptr.hpp"

namespace duckdb {

Expand All @@ -20,6 +20,9 @@ struct JoinRelationSet {
}

string ToString() const;
bool Empty() {
return count == 0;
}

unsafe_unique_array<idx_t> relations;
idx_t count;
Expand All @@ -39,6 +42,7 @@ class JoinRelationSetManager {
};

public:
JoinRelationSet &GetEmptyJoinRelationSet();
//! Create or get a JoinRelationSet from a single node with the given index
JoinRelationSet &GetJoinRelation(idx_t index);
//! Create or get a JoinRelationSet from a set of relation bindings
Expand All @@ -54,6 +58,7 @@ class JoinRelationSetManager {

private:
JoinRelationTreeNode root;
optional_ptr<JoinRelationSet> empty_relation_set;
};

} // namespace duckdb
33 changes: 1 addition & 32 deletions src/include/duckdb/optimizer/join_order/query_graph_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,15 @@
#pragma once

#include "duckdb/common/common.hpp"
#include "duckdb/common/enums/join_type.hpp"
#include "duckdb/common/optional_ptr.hpp"
#include "duckdb/common/pair.hpp"
#include "duckdb/common/unordered_map.hpp"
#include "duckdb/common/unordered_set.hpp"
#include "duckdb/common/vector.hpp"
#include "duckdb/optimizer/join_order/join_node.hpp"
#include "duckdb/optimizer/join_order/join_relation.hpp"
#include "duckdb/optimizer/join_order/query_graph.hpp"
#include "duckdb/optimizer/join_order/relation_manager.hpp"
#include "duckdb/planner/column_binding.hpp"
#include "duckdb/planner/logical_operator.hpp"

#include <functional>

namespace duckdb {

class QueryGraphEdges;
Expand All @@ -37,29 +31,6 @@ struct GenerateJoinRelation {
unique_ptr<LogicalOperator> op;
};

//! Filter info struct that is used by the cardinality estimator to set the initial cardinality
//! but is also eventually transformed into a query edge.
class FilterInfo {
public:
FilterInfo(unique_ptr<Expression> filter, JoinRelationSet &set, idx_t filter_index,
JoinType join_type = JoinType::INNER)
: filter(std::move(filter)), set(set), filter_index(filter_index), join_type(join_type) {
}

public:
unique_ptr<Expression> filter;
reference<JoinRelationSet> set;
idx_t filter_index;
JoinType join_type;
optional_ptr<JoinRelationSet> left_set;
optional_ptr<JoinRelationSet> right_set;
ColumnBinding left_binding;
ColumnBinding right_binding;

void SetLeftSet(optional_ptr<JoinRelationSet> left_set_new);
void SetRightSet(optional_ptr<JoinRelationSet> right_set_new);
};

//! The QueryGraphManager manages the process of extracting the reorderable and nonreorderable operations
//! from the logical plan and creating the intermediate structures needed by the plan enumerator.
//! When the plan enumerator finishes, the Query Graph Manger can then recreate the logical plan.
Expand Down Expand Up @@ -94,7 +65,7 @@ class QueryGraphManager {
//! products to create edges.
void CreateQueryGraphCrossProduct(JoinRelationSet &left, JoinRelationSet &right);

//! A map to store the optimal join plan found for a specific JoinRelationSet*
//! A map to store the optimal join plan found for a specific JoinRelationSet
optional_ptr<const reference_map_t<JoinRelationSet, unique_ptr<DPJoinNode>>> plans;

private:
Expand All @@ -106,8 +77,6 @@ class QueryGraphManager {

QueryGraphEdges query_graph;

void GetColumnBinding(Expression &expression, ColumnBinding &binding);

void CreateHyperGraphEdges();

GenerateJoinRelation GenerateJoins(vector<unique_ptr<LogicalOperator>> &extracted_relations, JoinRelationSet &set);
Expand Down
65 changes: 59 additions & 6 deletions src/include/duckdb/optimizer/join_order/relation_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,18 @@
#include "duckdb/common/unordered_map.hpp"
#include "duckdb/common/unordered_set.hpp"
#include "duckdb/optimizer/join_order/cardinality_estimator.hpp"
#include "duckdb/optimizer/join_order/join_node.hpp"
#include "duckdb/optimizer/join_order/join_relation.hpp"
#include "duckdb/optimizer/join_order/relation_statistics_helper.hpp"
#include "duckdb/parser/expression_map.hpp"
#include "duckdb/planner/logical_operator.hpp"
#include "duckdb/planner/logical_operator_visitor.hpp"

#include "duckdb/common/enums/join_type.hpp"
#include "duckdb/parser/expression/conjunction_expression.hpp"

namespace duckdb {

class JoinOrderOptimizer;
class FilterInfo;

//! Represents a single relation and any metadata accompanying that relation
struct SingleJoinRelation {
Expand All @@ -36,6 +37,40 @@ struct SingleJoinRelation {
}
};

//! FilterInfo models strores filter information so that edges between relations can be made
//! with the original ColumnBinding information available so that the cardinality estimator can
//! view the statistics of the underlying base tables.
class FilterInfo {
public:
FilterInfo(unique_ptr<Expression> filter, optional_ptr<JoinRelationSet> set, idx_t filter_index, JoinType join_type,
optional_ptr<JoinRelationSet> left_relation_set, optional_ptr<JoinRelationSet> right_relation_set,
ColumnBinding left_binding, ColumnBinding right_binding)
: filter(std::move(filter)), set(set), filter_index(filter_index), join_type(join_type),
left_relation_set(left_relation_set), right_relation_set(right_relation_set), left_binding(left_binding),
right_binding(right_binding) {
}
FilterInfo(unique_ptr<Expression> filter, optional_ptr<JoinRelationSet> set, idx_t filter_index, JoinType join_type,
optional_ptr<JoinRelationSet> left_relation_set, optional_ptr<JoinRelationSet> right_relation_set)
: filter(std::move(filter)), set(set), filter_index(filter_index), join_type(join_type),
left_relation_set(left_relation_set), right_relation_set(right_relation_set) {
}

public:
unique_ptr<Expression> filter;
optional_ptr<JoinRelationSet> set;
idx_t filter_index;
JoinType join_type;
optional_ptr<JoinRelationSet> left_relation_set;
optional_ptr<JoinRelationSet> right_relation_set;
// TODO: change this to be a binding set
ColumnBinding left_binding;
ColumnBinding right_binding;

void SetLeftSet(optional_ptr<JoinRelationSet> left_set_new);
void SetRightSet(optional_ptr<JoinRelationSet> right_set_new);
bool SingleColumnFilter();
};

class RelationManager {
public:
explicit RelationManager(ClientContext &context) : context(context) {
Expand All @@ -49,12 +84,26 @@ class RelationManager {

//! for each join filter in the logical plan op, extract the relations that are referred to on
//! both sides of the join filter, along with the tables & indexes.
vector<unique_ptr<FilterInfo>> ExtractEdges(LogicalOperator &op,
vector<reference<LogicalOperator>> &filter_operators,
vector<unique_ptr<FilterInfo>> ExtractEdges(vector<reference<LogicalOperator>> &filter_operators,
JoinRelationSetManager &set_manager);

//! Extract the set of relations referred to inside an expression
bool ExtractBindings(Expression &expression, unordered_set<idx_t> &bindings);
//! Extract all column bindings from an expression with the
void ExtractColumnBindingsFromExpression(Expression &expression, unordered_set<idx_t> &bindings);
//! Extract the Column binding from an expression
void ExtractColumnBinding(Expression &expression, ColumnBinding &binding);
// Inspects an expression and creates filter info instances that can connect two relations
// If the expreession (or conjunction expression children cannot create a FilterInfo), then
// they are returned to be added to the filter_op so they are pushed down at the end of reconstruction.
vector<unique_ptr<Expression>> CreateFilterInfoFromExpression(unique_ptr<Expression> expr,
JoinRelationSetManager &set_manager,
JoinType join_type = JoinType::INNER);
vector<unique_ptr<Expression>>
CreateFilterFromConjunctionChildren(unique_ptr<BoundConjunctionExpression> conjunction_expression,
JoinRelationSetManager &set_manager, JoinType join_type);

optional_ptr<JoinRelationSet> GetJoinRelations(column_binding_set_t &column_bindings,
JoinRelationSetManager &set_manager);
void GetColumnBindingsFromExpression(Expression &expression, column_binding_set_t &column_bindings);
void AddRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent, const RelationStats &stats);

void AddAggregateOrWindowRelation(LogicalOperator &op, optional_ptr<LogicalOperator> parent,
Expand All @@ -74,6 +123,10 @@ class RelationManager {
//! Set of all relations considered in the join optimizer
vector<unique_ptr<SingleJoinRelation>> relations;
unordered_set<idx_t> no_cross_product_relations;

//! used when extracting edges from the relations. They are then passed to the
//! query graph manager.
vector<unique_ptr<FilterInfo>> filter_infos_;
};

} // namespace duckdb
Loading
Loading