Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -229,12 +229,16 @@ public RelNode visit(HiveJoin join) {
// No self-join detected, return the join as is
aliases.addAll(lf.aliases);
aliases.addAll(rf.aliases);
} else {
// Self-join detected, introduce a derived table for the left side
aliases.addAll(rf.aliases);
newL = introduceDerivedTable(newL);
}
if (newL == join.getLeft() && newR == join.getRight()) {
return join;
} else {
return join.copy(join.getTraitSet(), Arrays.asList(newL, newR));
}
// Self-join detected, introduce a derived table for the left side
aliases.addAll(rf.aliases);
introduceDerivedTable(newL, join);
return join;
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,9 @@ create table t1 (key int, value int);
explain cbo
with cte as
(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1)
select * from cte a join cte b join cte c
select * from cte a join cte b join cte c;

explain cbo
with cte as
(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1)
select * from cte a join t1 b join cte c;
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,32 @@ POSTHOOK: Input: default@t1
CBO PLAN:
HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$12], value0=[$13], BLOCK__OFFSET__INSIDE__FILE0=[$14], INPUT__FILE__NAME0=[$15], ROW__ID0=[$16], ROW__IS__DELETED0=[$17], key1=[$6], value1=[$7], BLOCK__OFFSET__INSIDE__FILE1=[$8], INPUT__FILE__NAME1=[$9], ROW__ID1=[$10], ROW__IS__DELETED1=[$11])
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$6], value0=[$7], BLOCK__OFFSET__INSIDE__FILE0=[$8], INPUT__FILE__NAME0=[$9], ROW__ID0=[$10], ROW__IS__DELETED0=[$11])
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

Warning: Shuffle Join MERGEJOIN[13][tables = [t1, $hdt$_0]] in Stage 'Reducer 2' is a cross product
Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, t1]] in Stage 'Reducer 3' is a cross product
PREHOOK: query: explain cbo
with cte as
(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1)
select * from cte a join t1 b join cte c
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
#### A masked pattern was here ####
POSTHOOK: query: explain cbo
with cte as
(select key, value, BLOCK__OFFSET__INSIDE__FILE, INPUT__FILE__NAME, ROW__ID, ROW__IS__DELETED from t1)
select * from cte a join t1 b join cte c
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
#### A masked pattern was here ####
CBO PLAN:
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveProject(key=[$0], value=[$1])
HiveTableScan(table=[[default, t1]], table:alias=[b])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,8 @@ POSTHOOK: Input: default@t1
CBO PLAN:
HiveProject(key=[$0], value=[$1], key0=[$4], value0=[$5], key1=[$2], value1=[$3])
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(key=[$0], value=[$1], key0=[$2], value0=[$3])
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(key=[$0], value=[$1])
HiveTableScan(table=[[default, mv]], table:alias=[default.mv])
HiveTableScan(table=[[default, mv]], table:alias=[default.mv])
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveTableScan(table=[[default, mv]], table:alias=[default.mv])
HiveTableScan(table=[[default, mv]], table:alias=[default.mv])
HiveTableScan(table=[[default, mv]], table:alias=[default.mv])

Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,8 @@ POSTHOOK: Input: default@t1
CBO PLAN:
HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$12], value0=[$13], BLOCK__OFFSET__INSIDE__FILE0=[$14], INPUT__FILE__NAME0=[$15], ROW__ID0=[$16], ROW__IS__DELETED0=[$17], key1=[$6], value1=[$7], BLOCK__OFFSET__INSIDE__FILE1=[$8], INPUT__FILE__NAME1=[$9], ROW__ID1=[$10], ROW__IS__DELETED1=[$11])
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5], key0=[$6], value0=[$7], BLOCK__OFFSET__INSIDE__FILE0=[$8], INPUT__FILE__NAME0=[$9], ROW__ID0=[$10], ROW__IS__DELETED0=[$11])
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveProject(key=[$0], value=[$1], BLOCK__OFFSET__INSIDE__FILE=[$2], INPUT__FILE__NAME=[$3], ROW__ID=[$4], ROW__IS__DELETED=[$5])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveJoin(condition=[true], joinType=[inner], algorithm=[none], cost=[not available])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveTableScan(table=[[default, t1]], table:alias=[t1])
HiveTableScan(table=[[default, t1]], table:alias=[t1])