Skip to content

Commit

Permalink
Apply limit + order optimization in the optimizer (cmu-db#1385)
Browse files Browse the repository at this point in the history
* Apply limit + order optimization in the optimizer
  • Loading branch information
chenboy authored and tli2 committed Jun 26, 2018
1 parent f98029f commit 996d240
Show file tree
Hide file tree
Showing 9 changed files with 151 additions and 42 deletions.
22 changes: 20 additions & 2 deletions src/include/optimizer/operators.h
Original file line number Diff line number Diff line change
Expand Up @@ -299,9 +299,18 @@ class LogicalDistinct : public OperatorNode<LogicalDistinct> {
//===--------------------------------------------------------------------===//
class LogicalLimit : public OperatorNode<LogicalLimit> {
public:
static Operator make(int64_t offset, int64_t limit);
static Operator make(
int64_t offset, int64_t limit,
std::vector<expression::AbstractExpression *> &&sort_exprs,
std::vector<bool> &&sort_ascending);
int64_t offset;
int64_t limit;
// When we get a query like "SELECT * FROM tab ORDER BY a LIMIT 5"
// We'll let the limit operator keep the order by clause's content as an
// internal order, then the limit operator will generate sort plan with
// limit as a optimization.
std::vector<expression::AbstractExpression *> sort_exprs;
std::vector<bool> sort_ascending;
};

//===--------------------------------------------------------------------===//
Expand Down Expand Up @@ -470,9 +479,18 @@ class PhysicalOrderBy : public OperatorNode<PhysicalOrderBy> {
//===--------------------------------------------------------------------===//
class PhysicalLimit : public OperatorNode<PhysicalLimit> {
public:
static Operator make(int64_t offset, int64_t limit);
static Operator make(
int64_t offset, int64_t limit,
std::vector<expression::AbstractExpression *> sort_columns,
std::vector<bool> sort_ascending);
int64_t offset;
int64_t limit;
// When we get a query like "SELECT * FROM tab ORDER BY a LIMIT 5"
// We'll let the limit operator keep the order by clause's content as an
// internal order, then the limit operator will generate sort plan with
// limit as a optimization.
std::vector<expression::AbstractExpression *> sort_exprs;
std::vector<bool> sort_acsending;
};

//===--------------------------------------------------------------------===//
Expand Down
17 changes: 15 additions & 2 deletions src/include/planner/order_by_plan.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ class OrderByPlan : public AbstractPlan {
const std::vector<bool> &descend_flags,
const std::vector<oid_t> &output_column_ids);

OrderByPlan(const std::vector<oid_t> &sort_keys,
const std::vector<bool> &descend_flags,
const std::vector<oid_t> &output_column_ids, const uint64_t limit,
const uint64_t offset);

void PerformBinding(BindingContext &binding_context) override;

//===--------------------------------------------------------------------===//
Expand All @@ -51,13 +56,21 @@ class OrderByPlan : public AbstractPlan {
return output_ais_;
}

inline PlanNodeType GetPlanNodeType() const override { return PlanNodeType::ORDERBY; }
inline PlanNodeType GetPlanNodeType() const override {
return PlanNodeType::ORDERBY;
}

void GetOutputColumns(std::vector<oid_t> &columns) const override {
columns = GetOutputColumnIds();
}

const std::string GetInfo() const override { return "OrderBy"; }
const std::string GetInfo() const override {
return std::string("OrderBy") +
(limit_
? "(Limit : " + std::to_string(limit_number_) + ", Offset : " +
std::to_string(limit_offset_) + ")"
: "");
}

void SetUnderlyingOrder(bool same_order) { underling_ordered_ = same_order; }

Expand Down
14 changes: 10 additions & 4 deletions src/optimizer/child_property_deriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,11 +136,17 @@ void ChildPropertyDeriver::Visit(const PhysicalAggregate *) {
vector<shared_ptr<PropertySet>>{make_shared<PropertySet>()}));
}

void ChildPropertyDeriver::Visit(const PhysicalLimit *) {
// Let child fulfil all the required properties
vector<shared_ptr<PropertySet>> child_input_properties{requirements_};
void ChildPropertyDeriver::Visit(const PhysicalLimit *op) {
// Limit fulfill the internal sort property
vector<shared_ptr<PropertySet>> child_input_properties{
std::make_shared<PropertySet>()};
std::shared_ptr<PropertySet> provided_prop(new PropertySet);
if (!op->sort_exprs.empty()) {
provided_prop->AddProperty(
std::make_shared<PropertySort>(op->sort_exprs, op->sort_acsending));
}

output_.push_back(make_pair(requirements_, move(child_input_properties)));
output_.push_back(make_pair(provided_prop, move(child_input_properties)));
}

void ChildPropertyDeriver::Visit(const PhysicalDistinct *) {
Expand Down
28 changes: 25 additions & 3 deletions src/optimizer/input_column_deriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,18 +84,40 @@ void InputColumnDeriver::Visit(const QueryDerivedScan *op) {
output_cols, {input_cols}};
}

void InputColumnDeriver::Visit(const PhysicalLimit *) { Passdown(); }
void InputColumnDeriver::Visit(const PhysicalLimit *op) {
// All aggregate expressions and TVEs in the required columns and internal
// sort columns are needed by the child node
ExprSet input_cols_set;
for (auto expr : required_cols_) {
if (expression::ExpressionUtil::IsAggregateExpression(expr)) {
input_cols_set.insert(expr);
} else {
expression::ExpressionUtil::GetTupleValueExprs(input_cols_set, expr);
}
}
for (const auto& sort_column : op->sort_exprs) {
input_cols_set.insert(sort_column);
}
vector<AbstractExpression *> cols;
for (const auto &expr : input_cols_set) {
cols.push_back(expr);
}
output_input_cols_ =
pair<vector<AbstractExpression *>, vector<vector<AbstractExpression *>>>{
cols, {cols}};
}

void InputColumnDeriver::Visit(const PhysicalOrderBy *) {
// we need to pass down both required columns and sort columns
auto prop = properties_->GetPropertyOfType(PropertyType::SORT);
PELOTON_ASSERT(prop.get() != nullptr);
ExprSet input_cols_set;
for (auto expr : required_cols_) {
if (expression::ExpressionUtil::IsAggregateExpression(expr))
if (expression::ExpressionUtil::IsAggregateExpression(expr)) {
input_cols_set.insert(expr);
else
} else {
expression::ExpressionUtil::GetTupleValueExprs(input_cols_set, expr);
}
}
auto sort_prop = prop->As<PropertySort>();
size_t sort_col_size = sort_prop->GetSortColumnSize();
Expand Down
14 changes: 12 additions & 2 deletions src/optimizer/operators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,10 +443,15 @@ Operator LogicalDistinct::make() {
//===--------------------------------------------------------------------===//
// Limit
//===--------------------------------------------------------------------===//
Operator LogicalLimit::make(int64_t offset, int64_t limit) {
Operator LogicalLimit::make(
int64_t offset, int64_t limit,
std::vector<expression::AbstractExpression *> &&sort_exprs,
std::vector<bool> &&sort_ascending) {
LogicalLimit *limit_op = new LogicalLimit;
limit_op->offset = offset;
limit_op->limit = limit;
limit_op->sort_exprs = std::move(sort_exprs);
limit_op->sort_ascending = std::move(sort_ascending);
return Operator(limit_op);
}

Expand Down Expand Up @@ -656,10 +661,15 @@ Operator PhysicalOrderBy::make() {
//===--------------------------------------------------------------------===//
// PhysicalLimit
//===--------------------------------------------------------------------===//
Operator PhysicalLimit::make(int64_t offset, int64_t limit) {
Operator PhysicalLimit::make(
int64_t offset, int64_t limit,
std::vector<expression::AbstractExpression *> sort_exprs,
std::vector<bool> sort_ascending) {
PhysicalLimit *limit_op = new PhysicalLimit;
limit_op->offset = offset;
limit_op->limit = limit;
limit_op->sort_exprs = sort_exprs;
limit_op->sort_acsending = sort_ascending;
return Operator(limit_op);
}

Expand Down
33 changes: 29 additions & 4 deletions src/optimizer/plan_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,10 +156,35 @@ void PlanGenerator::Visit(const QueryDerivedScan *) {
}

void PlanGenerator::Visit(const PhysicalLimit *op) {
// Generate order by + limit plan when there's internal sort order
output_plan_ = std::move(children_plans_[0]);
if (!op->sort_exprs.empty()) {
vector<oid_t> column_ids;
PELOTON_ASSERT(children_expr_map_.size() == 1);
auto &child_cols_map = children_expr_map_[0];
for (size_t i = 0; i < output_cols_.size(); ++i) {
column_ids.push_back(child_cols_map[output_cols_[i]]);
}

PELOTON_ASSERT(op->sort_exprs.size() == op->sort_acsending.size());
auto sort_columns_size = op->sort_exprs.size();
vector<oid_t> sort_col_ids;
vector<bool> sort_flags;
for (size_t i = 0; i < sort_columns_size; ++i) {
sort_col_ids.push_back(child_cols_map[op->sort_exprs[i]]);
// planner use desc flag
sort_flags.push_back(!op->sort_acsending[i]);
}
unique_ptr<planner::AbstractPlan> order_by_plan(new planner::OrderByPlan(
sort_col_ids, sort_flags, column_ids, op->limit, op->offset));
order_by_plan->AddChild(std::move(output_plan_));
output_plan_ = std::move(order_by_plan);
}

unique_ptr<planner::AbstractPlan> limit_plan(
new planner::LimitPlan(op->limit, op->offset));
limit_plan->AddChild(move(children_plans_[0]));
output_plan_ = move(limit_plan);
limit_plan->AddChild(move(output_plan_));
output_plan_ = std::move(limit_plan);
}

void PlanGenerator::Visit(const PhysicalOrderBy *) {
Expand Down Expand Up @@ -508,8 +533,8 @@ void PlanGenerator::BuildProjectionPlan() {

void PlanGenerator::BuildAggregatePlan(
AggregateType aggr_type,
const std::vector<std::shared_ptr<expression::AbstractExpression>> *
groupby_cols,
const std::vector<std::shared_ptr<expression::AbstractExpression>>
*groupby_cols,
std::unique_ptr<expression::AbstractExpression> having_predicate) {
vector<planner::AggregatePlan::AggTerm> aggr_terms;
vector<catalog::Column> output_schema_columns;
Expand Down
12 changes: 11 additions & 1 deletion src/optimizer/query_to_operator_transformer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,18 @@ void QueryToOperatorTransformer::Visit(parser::SelectStatement *op) {
}

if (op->limit != nullptr) {
const auto &order_info = op->order;
std::vector<expression::AbstractExpression *> sort_exprs;
std::vector<bool> sort_ascending;
for (auto &expr : order_info->exprs) {
sort_exprs.push_back(expr.get());
}
for (auto &type : order_info->types) {
sort_ascending.push_back(type == parser::kOrderAsc);
}
auto limit_expr = std::make_shared<OperatorExpression>(
LogicalLimit::make(op->limit->offset, op->limit->limit));
LogicalLimit::make(op->limit->offset, op->limit->limit,
std::move(sort_exprs), std::move(sort_ascending)));
limit_expr->PushChild(output_expr_);
output_expr_ = limit_expr;
}
Expand Down
3 changes: 2 additions & 1 deletion src/optimizer/rule_impls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -811,7 +811,8 @@ void ImplementLimit::Transform(
const LogicalLimit *limit_op = input->Op().As<LogicalLimit>();

auto result_plan = std::make_shared<OperatorExpression>(
PhysicalLimit::make(limit_op->offset, limit_op->limit));
PhysicalLimit::make(limit_op->offset, limit_op->limit,
limit_op->sort_exprs, limit_op->sort_ascending));
std::vector<std::shared_ptr<OperatorExpression>> children = input->Children();
PELOTON_ASSERT(children.size() == 1);

Expand Down
50 changes: 27 additions & 23 deletions src/planner/order_by_plan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// Copyright (c) 2015-17, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#include <memory>
#include <vector>

Expand All @@ -21,24 +21,35 @@ namespace peloton {
namespace planner {

OrderByPlan::OrderByPlan(const std::vector<oid_t> &sort_keys,
const std::vector<bool> &descend_flags,
const std::vector<oid_t> &output_column_ids)
: sort_keys_(sort_keys),
descend_flags_(descend_flags),
output_column_ids_(output_column_ids) {}
const std::vector<bool> &descend_flags,
const std::vector<oid_t> &output_column_ids)
: sort_keys_(sort_keys),
descend_flags_(descend_flags),
output_column_ids_(output_column_ids) {}

OrderByPlan::OrderByPlan(const std::vector<oid_t> &sort_keys,
const std::vector<bool> &descend_flags,
const std::vector<oid_t> &output_column_ids,
const uint64_t limit, const uint64_t offset)
: sort_keys_(sort_keys),
descend_flags_(descend_flags),
output_column_ids_(output_column_ids),
limit_(true),
limit_number_(limit),
limit_offset_(offset) {}

void OrderByPlan::PerformBinding(BindingContext &binding_context) {
// Let the child do its binding first
AbstractPlan::PerformBinding(binding_context);

for (const oid_t col_id : GetOutputColumnIds()) {
auto* ai = binding_context.Find(col_id);
auto *ai = binding_context.Find(col_id);
PELOTON_ASSERT(ai != nullptr);
output_ais_.push_back(ai);
}

for (const oid_t sort_key_col_id : GetSortKeys()) {
auto* ai = binding_context.Find(sort_key_col_id);
auto *ai = binding_context.Find(sort_key_col_id);
PELOTON_ASSERT(ai != nullptr);
sort_key_ais_.push_back(ai);
}
Expand All @@ -64,39 +75,32 @@ hash_t OrderByPlan::Hash() const {
}

bool OrderByPlan::operator==(const AbstractPlan &rhs) const {
if (GetPlanNodeType() != rhs.GetPlanNodeType())
return false;
if (GetPlanNodeType() != rhs.GetPlanNodeType()) return false;

auto &other = static_cast<const planner::OrderByPlan &>(rhs);

// Sort Keys
// Sort Keys
size_t sort_keys_count = GetSortKeys().size();
if (sort_keys_count != other.GetSortKeys().size())
return false;
if (sort_keys_count != other.GetSortKeys().size()) return false;

for (size_t i = 0; i < sort_keys_count; i++) {
if (GetSortKeys()[i] != other.GetSortKeys()[i])
return false;
if (GetSortKeys()[i] != other.GetSortKeys()[i]) return false;
}

// Descend Flags
size_t descend_flags_count = GetDescendFlags().size();
if (descend_flags_count != other.GetDescendFlags().size())
return false;
if (descend_flags_count != other.GetDescendFlags().size()) return false;

for (size_t i = 0; i < descend_flags_count; i++) {
if (GetDescendFlags()[i] != other.GetDescendFlags()[i])
return false;
if (GetDescendFlags()[i] != other.GetDescendFlags()[i]) return false;
}

// Output Column Ids
size_t column_id_count = GetOutputColumnIds().size();
if (column_id_count != other.GetOutputColumnIds().size())
return false;
if (column_id_count != other.GetOutputColumnIds().size()) return false;

for (size_t i = 0; i < column_id_count; i++) {
if (GetOutputColumnIds()[i] != other.GetOutputColumnIds()[i])
return false;
if (GetOutputColumnIds()[i] != other.GetOutputColumnIds()[i]) return false;
}

return AbstractPlan::operator==(rhs);
Expand Down

0 comments on commit 996d240

Please sign in to comment.