Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -102,8 +103,8 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {

private static final ThreadLocal<ColumnAccessInfo> COLUMN_ACCESS_INFO =
new ThreadLocal<>();
private static final ThreadLocal<Map<HiveProject, Table>> VIEW_PROJECT_TO_TABLE_SCHEMA =
new ThreadLocal<>();
private static final ThreadLocal<Map<RelNode, Pair<Table, List<RexNode>>>>
VIEW_RELNODE_TO_TABLE_AND_PROJECTS = new ThreadLocal<>();


protected HiveRelFieldTrimmer(boolean fetchStats) {
Expand Down Expand Up @@ -155,17 +156,18 @@ public RelNode trim(RelBuilder relBuilder, RelNode root) {
}

public RelNode trim(RelBuilder relBuilder, RelNode root,
ColumnAccessInfo columnAccessInfo, Map<HiveProject, Table> viewToTableSchema) {
ColumnAccessInfo columnAccessInfo,
Map<RelNode, Pair<Table, List<RexNode>>> relNodeToTableAndProjects) {
try {
// Set local thread variables
COLUMN_ACCESS_INFO.set(columnAccessInfo);
VIEW_PROJECT_TO_TABLE_SCHEMA.set(viewToTableSchema);
VIEW_RELNODE_TO_TABLE_AND_PROJECTS.set(relNodeToTableAndProjects);
// Execute pruning
return super.trim(relBuilder, root);
} finally {
// Always remove the local thread variables to avoid leaks
COLUMN_ACCESS_INFO.remove();
VIEW_PROJECT_TO_TABLE_SCHEMA.remove();
VIEW_RELNODE_TO_TABLE_AND_PROJECTS.remove();
}
}

Expand All @@ -182,6 +184,7 @@ protected TrimResult trimChild(
RelNode input,
final ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
setColumnAccessInfoForViews(rel, fieldsUsed);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to add a call for the root node somewhere as well.

final ImmutableBitSet.Builder fieldsUsedBuilder = fieldsUsed.rebuild();

// Correlating variables are a means for other relational expressions to use
Expand All @@ -203,6 +206,26 @@ protected RexNode handle(RexFieldAccess fieldAccess) {
return dispatchTrimFields(input, fieldsUsedBuilder.build(), extraFields);
}

private static void setColumnAccessInfoForViews(RelNode rel, ImmutableBitSet fieldsUsed) {
final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
final Map<RelNode, Pair<Table, List<RexNode>>> relNodeToTableAndProjects = VIEW_RELNODE_TO_TABLE_AND_PROJECTS.get();

// HiveTableScans are handled separately in HiveTableScan's trimFields method.
if (!(rel instanceof HiveTableScan) &&
columnAccessInfo != null &&
relNodeToTableAndProjects != null &&
relNodeToTableAndProjects.containsKey(rel)) {
Table table = Objects.requireNonNull(relNodeToTableAndProjects.get(rel).left);
List<RexNode> projects = relNodeToTableAndProjects.get(rel).right;
List<FieldSchema> tableAllCols = table.getAllCols();
for (Ord<RexNode> ord : Ord.zip(projects)) {
if (fieldsUsed.get(ord.i)) {
columnAccessInfo.add(table.getCompleteName(), tableAllCols.get(ord.i).getName());
}
}
Comment on lines +219 to +225
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we need the projects. We could just iterate over the fields from the row type of the RelNode. That would also simplify the map, i.e., just a Map<RelNode, Table>, instead of a Map<RelNode, Pair<Table, List<RexNode>>>.

}
}

/**
* Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
* {@link org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveMultiJoin}.
Expand Down Expand Up @@ -726,27 +749,6 @@ public TrimResult trimFields(Aggregate aggregate, ImmutableBitSet fieldsUsed, Se
return result(relBuilder.build(), mapping);
}

/**
* Variant of {@link #trimFields(RelNode, ImmutableBitSet, Set)} for
* {@link org.apache.calcite.rel.logical.LogicalProject}.
*/
public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
// set columnAccessInfo for ViewColumnAuthorization
final ColumnAccessInfo columnAccessInfo = COLUMN_ACCESS_INFO.get();
final Map<HiveProject, Table> viewProjectToTableSchema = VIEW_PROJECT_TO_TABLE_SCHEMA.get();
if (columnAccessInfo != null && viewProjectToTableSchema != null
&& viewProjectToTableSchema.containsKey(project)) {
for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
if (fieldsUsed.get(ord.i)) {
Table tab = viewProjectToTableSchema.get(project);
columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName());
}
}
}
return super.trimFields(project, fieldsUsed, extraFields);
}

public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, extraFields);
Expand Down
26 changes: 15 additions & 11 deletions ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import com.google.common.collect.Multimap;

import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Function;
import java.util.regex.Pattern;
Expand Down Expand Up @@ -1577,7 +1578,7 @@ public class CalcitePlannerAction implements Frameworks.PlannerAction<RelNode> {
private final Map<String, PrunedPartitionList> partitionCache;
private final Map<String, ColumnStatsList> colStatsCache;
private final ColumnAccessInfo columnAccessInfo;
private Map<HiveProject, Table> viewProjectToTableSchema;
private Map<RelNode, Pair<Table, List<RexNode>>> relNodeToTableAndProjects;
private final QB rootQB;

// correlated vars across subqueries within same query needs to have different ID
Expand Down Expand Up @@ -1662,8 +1663,12 @@ public RelNode apply(RelOptCluster cluster, RelOptSchema relOptSchema, SchemaPlu
// We need to get the ColumnAccessInfo and viewToTableSchema for views.
if (conf.getBoolVar(ConfVars.HIVE_STATS_COLLECT_SCANCOLS) || !skipAuthorization()) {
HiveRelFieldTrimmer.get()
.trim(HiveRelFactories.HIVE_BUILDER.create(optCluster, null), calcitePlan, this.columnAccessInfo,
this.viewProjectToTableSchema);
.trim(
HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
calcitePlan,
this.columnAccessInfo,
this.relNodeToTableAndProjects
);
}
perfLogger.perfLogEnd(this.getClass().getName(), PerfLogger.MV_REWRITE_FIELD_TRIMMER);

Expand Down Expand Up @@ -4918,15 +4923,13 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,

aliasToRel.put(subqAlias, relNode);
if (qb.getViewToTabSchema().containsKey(subqAlias)) {
if (relNode instanceof HiveProject) {
if (this.viewProjectToTableSchema == null) {
this.viewProjectToTableSchema = new LinkedHashMap<>();
}
viewProjectToTableSchema.put((HiveProject) relNode, qb.getViewToTabSchema().get(subqAlias));
} else {
throw new SemanticException("View " + subqAlias + " is corresponding to "
+ relNode.toString() + ", rather than a HiveProject.");
List<RexNode> projects = relNode instanceof HiveProject project ?
project.getProjects() :
HiveCalciteUtil.getProjsFromBelowAsInputRef(Objects.requireNonNull(relNode));
if (this.relNodeToTableAndProjects == null) {
this.relNodeToTableAndProjects = new HashMap<>();
}
relNodeToTableAndProjects.put(relNode, Pair.of(qb.getViewToTabSchema().get(subqAlias), projects));
}
}

Expand Down Expand Up @@ -5048,6 +5051,7 @@ private RelNode genLogicalPlan(QB qb, boolean outerMostQB,
return srcRel;
}


private RelNode genGBHavingLogicalPlan(QB qb, RelNode srcRel) throws SemanticException {
RelNode gbFilter = null;
QBParseInfo qbp = getQBParseInfo(qb);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
set hive.security.authorization.enabled=true;
create table t1 (username string, id int);

create view vw_t1 as select distinct username from t1 limit 5;
explain cbo select * from vw_t1;
select * from vw_t1;

create view vw_t2 as
select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id;
explain cbo select * from vw_t2;
select * from vw_t2;

create view vw_t3 as
select username from (select username, id from t1 where id > 10 limit 10) x where username > 'a' limit 5;
explain cbo select * from vw_t3;
select * from vw_t3;
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
PREHOOK: query: create table t1 (username string, id int)
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@t1
POSTHOOK: query: create table t1 (username string, id int)
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@t1
PREHOOK: query: create view vw_t1 as select distinct username from t1 limit 5
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@vw_t1
POSTHOOK: query: create view vw_t1 as select distinct username from t1 limit 5
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@vw_t1
POSTHOOK: Lineage: vw_t1.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ]
PREHOOK: query: explain cbo select * from vw_t1
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t1
#### A masked pattern was here ####
POSTHOOK: query: explain cbo select * from vw_t1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t1
#### A masked pattern was here ####
CBO PLAN:
HiveSortLimit(fetch=[5])
HiveProject(username=[$0])
HiveAggregate(group=[{0}])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: select * from vw_t1
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t1
#### A masked pattern was here ####
POSTHOOK: query: select * from vw_t1
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t1
#### A masked pattern was here ####
PREHOOK: query: create view vw_t2 as
select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@vw_t2
POSTHOOK: query: create view vw_t2 as
select username from (select username, id from t1 where id > 10 limit 1) x where username > 'a' order by id
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@vw_t2
POSTHOOK: Lineage: vw_t2.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ]
PREHOOK: query: explain cbo select * from vw_t2
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t2
#### A masked pattern was here ####
POSTHOOK: query: explain cbo select * from vw_t2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t2
#### A masked pattern was here ####
CBO PLAN:
HiveFilter(condition=[>($0, _UTF-16LE'a')])
HiveProject(username=[$0])
HiveSortLimit(fetch=[1])
HiveProject(username=[$0])
HiveFilter(condition=[>($1, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: select * from vw_t2
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t2
#### A masked pattern was here ####
POSTHOOK: query: select * from vw_t2
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t2
#### A masked pattern was here ####
PREHOOK: query: create view vw_t3 as
select username from (select username, id from t1 where id > 10 limit 10) x where username > 'a' limit 5
PREHOOK: type: CREATEVIEW
PREHOOK: Input: default@t1
PREHOOK: Output: database:default
PREHOOK: Output: default@vw_t3
POSTHOOK: query: create view vw_t3 as
select username from (select username, id from t1 where id > 10 limit 10) x where username > 'a' limit 5
POSTHOOK: type: CREATEVIEW
POSTHOOK: Input: default@t1
POSTHOOK: Output: database:default
POSTHOOK: Output: default@vw_t3
POSTHOOK: Lineage: vw_t3.username SIMPLE [(t1)t1.FieldSchema(name:username, type:string, comment:null), ]
PREHOOK: query: explain cbo select * from vw_t3
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t3
#### A masked pattern was here ####
POSTHOOK: query: explain cbo select * from vw_t3
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t3
#### A masked pattern was here ####
CBO PLAN:
HiveSortLimit(fetch=[5])
HiveProject(username=[$0])
HiveFilter(condition=[>($0, _UTF-16LE'a')])
HiveProject(username=[$0])
HiveSortLimit(fetch=[10])
HiveProject(username=[$0])
HiveFilter(condition=[>($1, 10)])
HiveTableScan(table=[[default, t1]], table:alias=[t1])

PREHOOK: query: select * from vw_t3
PREHOOK: type: QUERY
PREHOOK: Input: default@t1
PREHOOK: Input: default@vw_t3
#### A masked pattern was here ####
POSTHOOK: query: select * from vw_t3
POSTHOOK: type: QUERY
POSTHOOK: Input: default@t1
POSTHOOK: Input: default@vw_t3
#### A masked pattern was here ####