diff --git a/core/trino-main/src/main/java/io/trino/sql/analyzer/CanonicalizationAware.java b/core/trino-main/src/main/java/io/trino/sql/analyzer/CanonicalizationAware.java index 131261192a4d..bdec8a1a89e7 100644 --- a/core/trino-main/src/main/java/io/trino/sql/analyzer/CanonicalizationAware.java +++ b/core/trino-main/src/main/java/io/trino/sql/analyzer/CanonicalizationAware.java @@ -25,20 +25,27 @@ public class CanonicalizationAware { private final T node; + private final boolean ignoreCase; // Updates to this field are thread-safe despite benign data race due to: // 1. idempotent hash computation // 2. atomic updates to int fields per JMM private int hashCode; - private CanonicalizationAware(T node) + private CanonicalizationAware(T node, boolean ignoreCase) { this.node = requireNonNull(node, "node is null"); + this.ignoreCase = ignoreCase; } public static CanonicalizationAware canonicalizationAwareKey(T node) { - return new CanonicalizationAware(node); + return new CanonicalizationAware(node, false); + } + + public static CanonicalizationAware canonicalizationAwareKey(T node, boolean ignoreCase) + { + return new CanonicalizationAware(node, ignoreCase); } public T getNode() @@ -51,7 +58,8 @@ public int hashCode() { int hash = hashCode; if (hash == 0) { - hash = treeHash(node, CanonicalizationAware::canonicalizationAwareHash); + hash = treeHash(node, + ignoreCase ? CanonicalizationAware::canonicalizationAwareIgnoreCaseHash : CanonicalizationAware::canonicalizationAwareHash); if (hash == 0) { hash = 1; } @@ -73,7 +81,8 @@ public boolean equals(Object o) } CanonicalizationAware other = (CanonicalizationAware) o; - return treeEqual(node, other.node, CanonicalizationAware::canonicalizationAwareComparison); + return treeEqual(node, other.node, + ignoreCase ? CanonicalizationAware::canonicalizationAwareIgnoreCaseComparison : CanonicalizationAware::canonicalizationAwareComparison); } @Override @@ -91,6 +100,15 @@ public static Boolean canonicalizationAwareComparison(Node left, Node right) return null; } + public static Boolean canonicalizationAwareIgnoreCaseComparison(Node left, Node right) + { + if (left instanceof Identifier leftIdentifier && right instanceof Identifier rightIdentifier) { + return leftIdentifier.getCanonicalValue(true).equals(rightIdentifier.getCanonicalValue(true)); + } + + return null; + } + public static OptionalInt canonicalizationAwareHash(Node node) { if (node instanceof Identifier identifier) { @@ -101,4 +119,15 @@ public static OptionalInt canonicalizationAwareHash(Node node) } return OptionalInt.empty(); } + + public static OptionalInt canonicalizationAwareIgnoreCaseHash(Node node) + { + if (node instanceof Identifier identifier) { + return OptionalInt.of(identifier.getCanonicalValue(true).hashCode()); + } + if (node.getChildren().isEmpty()) { + return OptionalInt.of(node.hashCode()); + } + return OptionalInt.empty(); + } } diff --git a/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java b/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java index 4138196025c8..7cd5bbe3dc33 100644 --- a/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java +++ b/core/trino-main/src/main/java/io/trino/sql/analyzer/StatementAnalyzer.java @@ -5764,7 +5764,7 @@ private void verifySelectDistinct(QuerySpecification node, List orde // SELECT a FROM t ORDER BY a // the "a" in the SELECT clause is bound to the FROM scope, while the "a" in ORDER BY clause is bound // to the "a" from the SELECT clause, so we can't compare by field id / relation id. - if (expression instanceof Identifier && aliases.contains(canonicalizationAwareKey(expression))) { + if (expression instanceof Identifier && aliases.contains(canonicalizationAwareKey(expression, true))) { continue; } @@ -5787,13 +5787,13 @@ private Set> getAliases(Select node) if (item instanceof SingleColumn column) { Optional alias = column.getAlias(); if (alias.isPresent()) { - aliases.add(canonicalizationAwareKey(alias.get())); + aliases.add(canonicalizationAwareKey(alias.get(), true)); } else if (column.getExpression() instanceof Identifier identifier) { - aliases.add(canonicalizationAwareKey(identifier)); + aliases.add(canonicalizationAwareKey(identifier, true)); } else if (column.getExpression() instanceof DereferenceExpression dereferenceExpression) { - aliases.add(canonicalizationAwareKey(dereferenceExpression.getField().orElseThrow())); + aliases.add(canonicalizationAwareKey(dereferenceExpression.getField().orElseThrow(), true)); } } else if (item instanceof AllColumns allColumns) { @@ -5803,10 +5803,10 @@ else if (item instanceof AllColumns allColumns) { Field field = fields.get(i); if (!allColumns.getAliases().isEmpty()) { - aliases.add(canonicalizationAwareKey(allColumns.getAliases().get(i))); + aliases.add(canonicalizationAwareKey(allColumns.getAliases().get(i), true)); } else if (field.getName().isPresent()) { - aliases.add(canonicalizationAwareKey(new Identifier(field.getName().get()))); + aliases.add(canonicalizationAwareKey(new Identifier(field.getName().get()), true)); } } } diff --git a/core/trino-main/src/test/java/io/trino/sql/query/TestDistinctWithOrderBy.java b/core/trino-main/src/test/java/io/trino/sql/query/TestDistinctWithOrderBy.java index 68b7447f8fdd..73bb5c92ecb8 100644 --- a/core/trino-main/src/test/java/io/trino/sql/query/TestDistinctWithOrderBy.java +++ b/core/trino-main/src/test/java/io/trino/sql/query/TestDistinctWithOrderBy.java @@ -72,6 +72,52 @@ public void testOrderByReferenceWithMixedStyles() assertThat(assertions.query("SELECT DISTINCT a, b a FROM (VALUES (2, 10), (1, 20), (2, 10)) T(a, b) ORDER BY T.a")) .ordered() .matches("VALUES (1, 20), (2, 10)"); + + // tests with delimited and lower case identifiers + assertThat(assertions.query("SELECT DISTINCT a as x FROM (VALUES 2, 1, 2) t(a) ORDER BY \"x\"")) + .matches("VALUES 1, 2"); + + assertThat(assertions.query("SELECT DISTINCT a as \"x\" FROM (VALUES 2, 1, 2) t(a) ORDER BY x")) + .matches("VALUES 1, 2"); + + assertThat(assertions.query("SELECT DISTINCT a as \"x\" FROM (VALUES 2, 1, 2) t(a) ORDER BY \"x\"")) + .matches("VALUES 1, 2"); + + // tests with delimited and upper case identifiers + assertThat(assertions.query("SELECT DISTINCT a as X FROM (VALUES 2, 1, 2) t(a) ORDER BY \"X\"")) + .matches("VALUES 1, 2"); + + assertThat(assertions.query("SELECT DISTINCT a as \"X\" FROM (VALUES 2, 1, 2) t(a) ORDER BY X")) + .matches("VALUES 1, 2"); + + assertThat(assertions.query("SELECT DISTINCT a as \"X\" FROM (VALUES 2, 1, 2) t(a) ORDER BY \"X\"")) + .matches("VALUES 1, 2"); + + // tests with mixed lower (in SELECT DISTINCT) and upper (in ORDER BY) case identifiers + assertThat(assertions.query("SELECT DISTINCT a as x FROM (VALUES 2, 1, 2) t(a) ORDER BY X")) + .matches("VALUES 1, 2"); + + assertThat(assertions.query("SELECT DISTINCT a as x FROM (VALUES 2, 1, 2) t(a) ORDER BY \"X\"")) + .matches("VALUES 1, 2"); + + assertThat(assertions.query("SELECT DISTINCT a as \"x\" FROM (VALUES 2, 1, 2) t(a) ORDER BY X")) + .matches("VALUES 1, 2"); + + assertThat(assertions.query("SELECT DISTINCT a as \"x\" FROM (VALUES 2, 1, 2) t(a) ORDER BY \"X\"")) + .matches("VALUES 1, 2"); + + // tests with mixed upper (in SELECT DISTINCT) and lower (in ORDER BY) case identifiers + assertThat(assertions.query("SELECT DISTINCT a as X FROM (VALUES 2, 1, 2) t(a) ORDER BY x")) + .matches("VALUES 1, 2"); + + assertThat(assertions.query("SELECT DISTINCT a as X FROM (VALUES 2, 1, 2) t(a) ORDER BY \"x\"")) + .matches("VALUES 1, 2"); + + assertThat(assertions.query("SELECT DISTINCT a as \"X\" FROM (VALUES 2, 1, 2) t(a) ORDER BY x")) + .matches("VALUES 1, 2"); + + assertThat(assertions.query("SELECT DISTINCT a as \"X\" FROM (VALUES 2, 1, 2) t(a) ORDER BY \"x\"")) + .matches("VALUES 1, 2"); } @Test diff --git a/core/trino-parser/src/main/java/io/trino/sql/tree/Identifier.java b/core/trino-parser/src/main/java/io/trino/sql/tree/Identifier.java index 2c7a0743a4f3..8cf9558ce70b 100644 --- a/core/trino-parser/src/main/java/io/trino/sql/tree/Identifier.java +++ b/core/trino-parser/src/main/java/io/trino/sql/tree/Identifier.java @@ -78,10 +78,14 @@ public boolean isDelimited() public String getCanonicalValue() { - if (isDelimited()) { + return getCanonicalValue(false); + } + + public String getCanonicalValue(boolean ignoreCase) + { + if (!ignoreCase && isDelimited()) { return value; } - return value.toUpperCase(ENGLISH); }