Skip to content

Commit

Permalink
Add Big Join Query
Browse files Browse the repository at this point in the history
Based on Peter's advice to see if my microbenchmarks generalize to a
very simple query - turns out they do!

We add `q_bigjoin`: `SELECT count(*) FROM lineitem, orders WHERE
l_orderkey = o_orderkey`.

For this query at SF 10 we can actually nicely see that Interpretation &
ROF start winning against JIT compilation because they can issue more
independent hash table loads.
  • Loading branch information
wagjamin committed Nov 6, 2023
1 parent 0070b4a commit a6e0932
Show file tree
Hide file tree
Showing 5 changed files with 61 additions and 1 deletion.
53 changes: 53 additions & 0 deletions src/common/TPCH.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1784,6 +1784,59 @@ std::unique_ptr<Print> q19(const Schema& schema) {

}

std::unique_ptr<Print> q_bigjoin(const inkfuse::Schema& schema) {
// Join orders onto lineitem.
auto& rel_o = schema.at("orders");
std::vector<std::string> cols_o{
"o_orderkey"};
auto scan_o = TableScan::build(*rel_o, cols_o, "scan_o");
auto& scan_ref_o = *scan_o;

auto& rel_l = schema.at("lineitem");
std::vector<std::string> cols_l{
"l_orderkey"};
auto scan_l = TableScan::build(*rel_l, cols_l, "scan_l");
auto& scan_ref_l = *scan_l;

std::vector<RelAlgOpPtr> o_l_join_children;
o_l_join_children.push_back(std::move(scan_o));
o_l_join_children.push_back(std::move(scan_l));
auto o_l_join = Join::build(
std::move(o_l_join_children),
"o_l_join",
// Keys left (o_orderkey)
{scan_ref_o.getOutput()[0]},
// Payload left ()
{},
// Keys right (l_orderkey)
{scan_ref_l.getOutput()[0]},
{},
JoinType::Inner,
true);
auto& o_l_join_ref = *o_l_join;

// 2. Aggregate count(*).
std::vector<RelAlgOpPtr> agg_children;
agg_children.push_back(std::move(o_l_join));
// Don't group by anything on this query.
std::vector<const IU*> group_by{};
std::vector<AggregateFunctions::Description> aggregates{
{*o_l_join_ref.getOutput()[0], AggregateFunctions::Opcode::Count}};
auto agg = Aggregation::build(
std::move(agg_children),
"agg",
std::move(group_by),
std::move(aggregates));

// 6. Print
std::vector<const IU*> out_ius{agg->getOutput()[0]};
std::vector<std::string> colnames = {"num_rows"};
std::vector<RelAlgOpPtr> print_children;
print_children.push_back(std::move(agg));
return Print::build(std::move(print_children),
std::move(out_ius), std::move(colnames));
}

std::unique_ptr<Print> l_count(const inkfuse::Schema& schema) {
// 1. Scan from lineitem.
auto& rel = schema.at("lineitem");
Expand Down
1 change: 1 addition & 0 deletions src/common/TPCH.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ std::unique_ptr<Print> q18(const Schema& schema);
std::unique_ptr<Print> q19(const Schema& schema);

/// Some interesting custom queries. See /tpch for query text.
std::unique_ptr<Print> q_bigjoin(const Schema& schema);
std::unique_ptr<Print> l_count(const Schema& schema);
std::unique_ptr<Print> l_point(const Schema& schema);

Expand Down
4 changes: 3 additions & 1 deletion test/tpch/test_queries.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ const std::unordered_map<std::string, FunctionT> generator_map{
{"q14", tpch::q14},
{"q18", tpch::q18},
{"q19", tpch::q19},
{"q_bigjoin", tpch::q_bigjoin},
{"l_count", tpch::l_count},
{"l_point", tpch::l_point},
};
Expand All @@ -53,6 +54,7 @@ std::unordered_map<std::string, size_t> expected_rows{
{"q14", 1},
{"q18", 0},
{"q19", 0},
{"q_bigjoin", 1},
{"l_count", 1},
{"l_point", 6},
};
Expand All @@ -76,7 +78,7 @@ INSTANTIATE_TEST_CASE_P(
tpch_queries,
TPCHQueriesTestT,
::testing::Combine(
::testing::Values("q1", "q3", "q4", "q5", "q6", "q14", "q18", "q19", "l_count", "l_point"),
::testing::Values("q1", "q3", "q4", "q5", "q6", "q14", "q18", "q19", "l_count", "q_bigjoin", "l_point"),
::testing::Values(
PipelineExecutor::ExecutionMode::Fused,
PipelineExecutor::ExecutionMode::Interpreted,
Expand Down
1 change: 1 addition & 0 deletions tools/inkfuse_bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ const std::vector<std::pair<std::string, decltype(tpch::q1)*>> queries = {
{"q14", tpch::q14},
{"q18", tpch::q18},
{"q19", tpch::q19},
{"q_bigjoin", tpch::q_bigjoin},
{"l_count", tpch::l_count},
{"l_point", tpch::l_point},
};
Expand Down
3 changes: 3 additions & 0 deletions tools/inkfuse_runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,9 @@ int main(int argc, char* argv[]) {
} else if (split[1] == "q19") {
auto q = tpch::q19(*loaded);
runQuery("q19", std::move(q), mode, thread_count);
} else if (split[1] == "q_bigjoin") {
auto q = tpch::q_bigjoin(*loaded);
runQuery("q_bigjoin", std::move(q), mode, thread_count);
} else if (split[1] == "l_count") {
auto q = tpch::l_count(*loaded);
runQuery("l_count", std::move(q), mode, thread_count);
Expand Down

0 comments on commit a6e0932

Please sign in to comment.