Skip to content

Commit

Permalink
[CALCITE-6219] 'Must-filter' columns
Browse files Browse the repository at this point in the history
A table can declare that some of its columns must be filtered
by implementing `interface SemanticTable`. If such columns
are not filtered in a WHERE or HAVING clause, the validator
throws.

There are several purposes of these columns, one of which is
to prevent expensive full-table scans (for example, reading
all Orders without restricting on orderDate).

Implementation is via the method
SqlValidatorNamespace.getMustFilterFields(). For a table
namespace, that method returns the declared must-filter
fields. For a query namespce, that method returns any
must-filter fields that have not been filtered in that query;
such fields become the responsibility of the enclosing query.

If a field is must-filter and is not in the SELECT clause of a
sub-query, that is also an error, because of course it is now
impossible for the enclosing query to filter it.

Close apache#3688

Co-authored-by: Julian Hyde <[email protected]>
  • Loading branch information
olivrlee and julianhyde committed Feb 27, 2024
1 parent 96485c0 commit 2558c13
Show file tree
Hide file tree
Showing 15 changed files with 560 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1085,6 +1085,9 @@ ExInst<RuntimeException> multipleCapturingGroupsForRegexpFunctions(String value,
@BaseMessage("A table function at most has one input table with row semantics. Table function ''{0}'' has multiple input tables with row semantics")
ExInst<SqlValidatorException> multipleRowSemanticsTables(String funcName);

@BaseMessage("SQL statement did not contain filters on the following fields: {0}")
ExInst<SqlValidatorException> mustFilterFieldsMissing(String mustFilterFields);

@BaseMessage("BIT_GET/GETBIT error: negative position {0,number} not allowed")
ExInst<CalciteException> illegalNegativeBitGetPosition(int position);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Pair;
import org.apache.calcite.util.Util;

Expand Down Expand Up @@ -57,6 +58,10 @@ abstract class AbstractNamespace implements SqlValidatorNamespace {
/** As {@link #rowType}, but not necessarily a struct. */
protected @Nullable RelDataType type;

/** Ordinals of fields that must be filtered. Initially the empty set, but
* should typically be re-assigned on validate. */
protected ImmutableBitSet mustFilterFields = ImmutableBitSet.of();

protected final @Nullable SqlNode enclosingNode;

//~ Constructors -----------------------------------------------------------
Expand Down Expand Up @@ -159,6 +164,11 @@ abstract class AbstractNamespace implements SqlValidatorNamespace {
return ImmutableList.of();
}

@Override public ImmutableBitSet getMustFilterFields() {
return requireNonNull(mustFilterFields,
"mustFilterFields (maybe validation is not complete?)");
}

@Override public SqlMonotonicity getMonotonicity(String columnName) {
return SqlMonotonicity.NOT_MONOTONIC;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ private SqlValidatorNamespace resolveImpl(SqlIdentifier id) {
}
}

this.mustFilterFields = resolvedNamespace.getMustFilterFields();
RelDataType rowType = resolvedNamespace.getRowType();

if (extendList != null) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.sql.validate;

import org.checkerframework.checker.nullness.qual.Nullable;

/**
* Extension to {@link SqlValidatorTable} with extra, optional metadata.
*
* <p>Used to flag individual columns as 'must-filter'.
*/
public interface SemanticTable {
/** Returns the filter expression for {@code column}
* if it is a {@link #mustFilter(int) must-filter} column,
* or null if it is not a must-filter column.
*
* @param column Column ordinal (0-based)
*
* @throws IndexOutOfBoundsException if column ordinal is out of range */
default @Nullable String getFilter(int column) {
return null;
}

/** Returns whether {@code column} must be filtered in any query
* that references this table.
*
* @param column Column ordinal (0-based)
*
* @throws IndexOutOfBoundsException if column ordinal is out of range */
default boolean mustFilter(int column) {
return getFilter(column) != null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import java.util.List;

import static java.util.Objects.hash;

/**
* Fully-qualified identifier.
*
Expand All @@ -46,6 +48,29 @@ private SqlQualified(@Nullable SqlValidatorScope scope, int prefixLength,
this.identifier = identifier;
}

@Override public int hashCode() {
return hash(identifier.names, prefixLength);
}

@Override public boolean equals(@Nullable Object obj) {
// Two SqlQualified instances are equivalent if they are of the same
// identifier and same prefix length. Thus, in
//
// SELECT e.address, e.address.zipcode
// FROM employees AS e
//
// "e.address" is {identifier=[e, address], prefixLength=1}
// and is distinct from "e.address.zipcode".
//
// We assume that all SqlQualified instances being compared are resolved
// from the same SqlValidatorScope, and therefore we do not need to look
// at namespace to distinguish them.
return this == obj
|| obj instanceof SqlQualified
&& prefixLength == ((SqlQualified) obj).prefixLength
&& identifier.names.equals(((SqlQualified) obj).identifier.names);
}

@Override public String toString() {
return "{id: " + identifier + ", prefix: " + prefixLength + "}";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
Expand All @@ -143,15 +144,19 @@
import java.util.HashMap;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.TreeSet;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Supplier;
import java.util.function.UnaryOperator;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
Expand Down Expand Up @@ -1160,6 +1165,23 @@ protected void validateNamespace(final SqlValidatorNamespace namespace,
SqlNode node = namespace.getNode();
if (node != null) {
setValidatedNodeType(node, namespace.getType());

if (node == top) {
// A top-level namespace must not return any must-filter fields.
// A non-top-level namespace (e.g. a subquery) may return must-filter
// fields; these are neutralized if the consuming query filters on them.
final ImmutableBitSet mustFilterFields =
namespace.getMustFilterFields();
if (!mustFilterFields.isEmpty()) {
// Set of field names, sorted alphabetically for determinism.
Set<String> fieldNameSet =
StreamSupport.stream(mustFilterFields.spliterator(), false)
.map(namespace.getRowType().getFieldNames()::get)
.collect(Collectors.toCollection(TreeSet::new));
throw newValidationError(node,
RESOURCE.mustFilterFieldsMissing(fieldNameSet.toString()));
}
}
}
}

Expand Down Expand Up @@ -2740,7 +2762,7 @@ protected SetopNamespace createSetopNamespace(
* @param alias Name of this query within its parent. Must be specified
* if usingScope != null
*/
private void registerQuery(
protected void registerQuery(
SqlValidatorScope parentScope,
@Nullable SqlValidatorScope usingScope,
SqlNode node,
Expand Down Expand Up @@ -3870,6 +3892,65 @@ protected void validateSelect(
validateSelectList(selectItems, select, targetRowType);
ns.setType(rowType);

// Deduce which columns must be filtered.
ns.mustFilterFields = ImmutableBitSet.of();
if (from != null) {
final Set<SqlQualified> qualifieds = new LinkedHashSet<>();
for (ScopeChild child : fromScope.children) {
final List<String> fieldNames =
child.namespace.getRowType().getFieldNames();
child.namespace.getMustFilterFields()
.forEachInt(i ->
qualifieds.add(
SqlQualified.create(fromScope, 1, child.namespace,
new SqlIdentifier(
ImmutableList.of(child.name, fieldNames.get(i)),
SqlParserPos.ZERO))));
}
if (!qualifieds.isEmpty()) {
if (select.getWhere() != null) {
forEachQualified(select.getWhere(), getWhereScope(select),
qualifieds::remove);
}
if (select.getHaving() != null) {
forEachQualified(select.getHaving(), getHavingScope(select),
qualifieds::remove);
}

// Each of the must-filter fields identified must be returned as a
// SELECT item, which is then flagged as must-filter.
final BitSet mustFilterFields = new BitSet();
final List<SqlNode> expandedSelectItems =
requireNonNull(fromScope.getExpandedSelectList(),
"expandedSelectList");
forEach(expandedSelectItems, (selectItem, i) -> {
selectItem = stripAs(selectItem);
if (selectItem instanceof SqlIdentifier) {
SqlQualified qualified =
fromScope.fullyQualify((SqlIdentifier) selectItem);
if (qualifieds.remove(qualified)) {
// SELECT item #i referenced a must-filter column that was not
// filtered in the WHERE or HAVING. It becomes a must-filter
// column for our consumer.
mustFilterFields.set(i);
}
}
});

// If there are must-filter fields that are not in the SELECT clause,
// this is an error.
if (!qualifieds.isEmpty()) {
throw newValidationError(select,
RESOURCE.mustFilterFieldsMissing(
qualifieds.stream()
.map(q -> q.suffix().get(0))
.collect(Collectors.toCollection(TreeSet::new))
.toString()));
}
ns.mustFilterFields = ImmutableBitSet.fromBitSet(mustFilterFields);
}
}

// Validate ORDER BY after we have set ns.rowType because in some
// dialects you can refer to columns of the select list, e.g.
// "SELECT empno AS x FROM emp ORDER BY x"
Expand All @@ -3885,6 +3966,19 @@ protected void validateSelect(
}
}

/** For each identifier in an expression, resolves it to a qualified name
* and calls the provided action. */
private static void forEachQualified(SqlNode node, SqlValidatorScope scope,
Consumer<SqlQualified> consumer) {
node.accept(new SqlBasicVisitor<Void>() {
@Override public Void visit(SqlIdentifier id) {
final SqlQualified qualified = scope.fullyQualify(id);
consumer.accept(qualified);
return null;
}
});
}

private void checkRollUpInSelectList(SqlSelect select) {
SqlValidatorScope scope = getSelectScope(select);
for (SqlNode item : SqlNonNullableAccessors.getSelectList(select)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeField;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Pair;

import org.checkerframework.checker.nullness.qual.Nullable;
Expand Down Expand Up @@ -212,4 +213,10 @@ default boolean fieldExists(String name) {
* @param modality Modality
*/
boolean supportsModality(SqlModality modality);

/** Returns the ordinals (in the row type) of the "must-filter" fields,
* fields that that must be filtered in a query. */
default ImmutableBitSet getMustFilterFields() {
return ImmutableBitSet.of();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -381,8 +381,7 @@ public static SqlValidatorWithHints newValidator(
SqlValidatorCatalogReader catalogReader,
RelDataTypeFactory typeFactory,
SqlValidator.Config config) {
return new SqlValidatorImpl(opTab, catalogReader, typeFactory,
config);
return new SqlValidatorImpl(opTab, catalogReader, typeFactory, config);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.calcite.sql.SqlIdentifier;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlNodeList;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.calcite.util.Util;

import com.google.common.collect.ImmutableList;
Expand All @@ -36,6 +37,7 @@
import java.util.List;
import java.util.Map;

import static org.apache.calcite.util.ImmutableBitSet.toImmutableBitSet;
import static org.apache.calcite.util.Static.RESOURCE;

import static java.util.Objects.requireNonNull;
Expand All @@ -58,6 +60,15 @@ private TableNamespace(SqlValidatorImpl validator, SqlValidatorTable table,
}

@Override protected RelDataType validateImpl(RelDataType targetRowType) {
this.mustFilterFields = ImmutableBitSet.of();
table.maybeUnwrap(SemanticTable.class)
.ifPresent(semanticTable ->
this.mustFilterFields =
table.getRowType().getFieldList().stream()
.map(RelDataTypeField::getIndex)
.filter(semanticTable::mustFilter)
.collect(toImmutableBitSet()));

if (extendedFields.isEmpty()) {
return table.getRowType();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class WithItemNamespace extends AbstractNamespace {
final SqlValidatorNamespace childNs =
validator.getNamespaceOrThrow(getQuery());
final RelDataType rowType = childNs.getRowTypeSansSystemColumns();
mustFilterFields = childNs.getMustFilterFields();
SqlNodeList columnList = withItem.columnList;
if (columnList == null) {
return rowType;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

import org.checkerframework.checker.nullness.qual.Nullable;

import static java.util.Objects.requireNonNull;

/**
* Namespace for <code>WITH</code> clause.
*/
Expand Down Expand Up @@ -56,9 +58,13 @@ public class WithNamespace extends AbstractNamespace {
}
final SqlValidatorScope scope2 =
validator.getWithScope(Util.last(with.withList));
final SqlValidatorNamespace bodyNamespace =
requireNonNull(validator.getNamespace(with.body), "namespace");

validator.validateQuery(with.body, scope2, targetRowType);
final RelDataType rowType = validator.getValidatedNodeType(with.body);
validator.setValidatedNodeType(with, rowType);
mustFilterFields = bodyNamespace.getMustFilterFields();
return rowType;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,7 @@ InvalidPartitionKeys=Only tables with set semantics may be partitioned. Invalid
InvalidOrderBy=Only tables with set semantics may be ordered. Invalid ORDER BY clause in the {0,number,#}-th operand of table function ''{1}''
MultipleRowSemanticsTables=A table function at most has one input table with row semantics. Table function ''{0}'' has multiple input tables with row semantics
NoOperator=No operator for ''{0}'' with kind: ''{1}'', syntax: ''{2}'' during JSON deserialization
MustFilterFieldsMissing=SQL statement did not contain filters on the following fields: {0}
IllegalNegativeBitGetPosition=BIT_GET/GETBIT error: negative position {0,number} not allowed
IllegalBitGetPositionExceedsLimit=BIT_GET/GETBIT error: position {0,number} exceeds the bit upper limit {1,number}
# End CalciteResource.properties
Loading

0 comments on commit 2558c13

Please sign in to comment.