From a1df03dc73297a323c1b9109726db33f8d9d9fd0 Mon Sep 17 00:00:00 2001 From: codluca Date: Wed, 24 Sep 2025 16:23:48 +0300 Subject: [PATCH] Fix 'IS NOT DISTINCT FROM' not working in JOIN when nulls are used The changes try to enable Domain with only null value to be used. Modify JoinDomainBuilder to take into consideration the nulls from the value blocks. --- .../io/trino/operator/JoinDomainBuilder.java | 15 +- .../java/io/trino/sql/DynamicFilters.java | 9 +- .../TestDynamicFilterSourceOperator.java | 12 +- .../io/trino/tests/TestJoinIsNotDistinct.java | 135 ++++++++++++++++++ 4 files changed, 160 insertions(+), 11 deletions(-) create mode 100644 testing/trino-tests/src/test/java/io/trino/tests/TestJoinIsNotDistinct.java diff --git a/core/trino-main/src/main/java/io/trino/operator/JoinDomainBuilder.java b/core/trino-main/src/main/java/io/trino/operator/JoinDomainBuilder.java index 6c14459e2e2b..dfb2e96b94ec 100644 --- a/core/trino-main/src/main/java/io/trino/operator/JoinDomainBuilder.java +++ b/core/trino-main/src/main/java/io/trino/operator/JoinDomainBuilder.java @@ -99,6 +99,14 @@ public class JoinDomainBuilder private long retainedSizeInBytes = INSTANCE_SIZE; + /** + * Indicates whether null values are allowed in the join domain. + * This is set to true if any null values are observed in the input blocks + * during domain building, and is used to determine whether the resulting + * domain should include nulls. + */ + private boolean nullsAllowed; + public JoinDomainBuilder( Type type, int maxDistinctValues, @@ -160,6 +168,9 @@ public boolean isCollecting() public void add(Block block) { + if (block.hasNull()) { + nullsAllowed = true; + } if (collectDistinctValues) { switch (block) { case ValueBlock valueBlock -> { @@ -290,8 +301,7 @@ public Domain build() } } } - // Inner and right join doesn't match rows with null key column values. - return Domain.create(ValueSet.copyOf(type, values.build()), false); + return Domain.create(ValueSet.copyOf(type, values.build()), nullsAllowed); } if (collectMinMax) { if (minValue == null) { @@ -307,7 +317,6 @@ public Domain build() private void add(ValueBlock block, int position) { - // Inner and right join doesn't match rows with null key column values. if (block.isNull(position)) { return; } diff --git a/core/trino-main/src/main/java/io/trino/sql/DynamicFilters.java b/core/trino-main/src/main/java/io/trino/sql/DynamicFilters.java index 02e9879121f8..41a3de31fb86 100644 --- a/core/trino-main/src/main/java/io/trino/sql/DynamicFilters.java +++ b/core/trino-main/src/main/java/io/trino/sql/DynamicFilters.java @@ -295,19 +295,18 @@ public Domain applyComparison(Domain domain) if (domain.isAll()) { return domain; } - if (domain.isNone()) { - // Dynamic filter collection skips nulls + if (domain.getValues().isNone()) { // In case of IS NOT DISTINCT FROM, an empty Domain should still allow null if (nullAllowed) { return Domain.onlyNull(domain.getType()); } - return domain; + return Domain.none(domain.getType()); } Range span = domain.getValues().getRanges().getSpan(); return switch (operator) { case EQUAL -> { - if (nullAllowed) { - yield Domain.create(domain.getValues(), true); + if (nullAllowed != domain.isNullAllowed()) { + yield Domain.create(domain.getValues(), nullAllowed); } yield domain; } diff --git a/core/trino-main/src/test/java/io/trino/operator/TestDynamicFilterSourceOperator.java b/core/trino-main/src/test/java/io/trino/operator/TestDynamicFilterSourceOperator.java index d58bfedbf178..c0568d5ec348 100644 --- a/core/trino-main/src/test/java/io/trino/operator/TestDynamicFilterSourceOperator.java +++ b/core/trino-main/src/test/java/io/trino/operator/TestDynamicFilterSourceOperator.java @@ -287,7 +287,7 @@ public void testCollectWithNulls() assertThat(partitions.build()).isEqualTo(ImmutableList.of( TupleDomain.withColumnDomains(ImmutableMap.of( - new DynamicFilterId("0"), Domain.create(ValueSet.of(INTEGER, 1L, 2L, 3L, 4L, 5L), false))))); + new DynamicFilterId("0"), Domain.create(ValueSet.of(INTEGER, 1L, 2L, 3L, 4L, 5L), true))))); } @Test @@ -490,7 +490,13 @@ public void testMultipleColumnsCollectMinMaxWithNulls() maxDistinctValues, ImmutableList.of(BIGINT, BIGINT), ImmutableList.of(largePage), - ImmutableList.of(TupleDomain.none())); + ImmutableList.of(TupleDomain.withColumnDomains(ImmutableMap.of( + new DynamicFilterId("0"), + Domain.onlyNull(BIGINT), + new DynamicFilterId("1"), + Domain.create( + ValueSet.ofRanges(range(BIGINT, 200L, true, 300L, true)), + false))))); } @Test @@ -570,7 +576,7 @@ public void testCollectDeduplication() ImmutableList.of(largePage, nullsPage), ImmutableList.of(TupleDomain.withColumnDomains(ImmutableMap.of( new DynamicFilterId("0"), - Domain.create(ValueSet.of(BIGINT, 7L), false))))); + Domain.create(ValueSet.of(BIGINT, 7L), true))))); } @Test diff --git a/testing/trino-tests/src/test/java/io/trino/tests/TestJoinIsNotDistinct.java b/testing/trino-tests/src/test/java/io/trino/tests/TestJoinIsNotDistinct.java new file mode 100644 index 000000000000..1845bcb2c01a --- /dev/null +++ b/testing/trino-tests/src/test/java/io/trino/tests/TestJoinIsNotDistinct.java @@ -0,0 +1,135 @@ +/* + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package io.trino.tests; + +import com.google.common.collect.ImmutableMap; +import io.trino.plugin.memory.MemoryPlugin; +import io.trino.sql.query.QueryAssertions; +import io.trino.testing.QueryRunner; +import io.trino.testing.StandaloneQueryRunner; +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInstance; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; + +import static io.trino.testing.TestingNames.randomNameSuffix; +import static io.trino.testing.TestingSession.testSessionBuilder; +import static java.lang.String.format; +import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.TestInstance.Lifecycle.PER_CLASS; + +@TestInstance(PER_CLASS) +@Execution(ExecutionMode.SAME_THREAD) +class TestJoinIsNotDistinct +{ + private static final String LOCAL_CATALOG = "local"; + private static final String DEFAULT_SCHEMA = "default"; + + private final QueryRunner queryRunner; + + private final QueryAssertions assertions; + + TestJoinIsNotDistinct() + { + queryRunner = new StandaloneQueryRunner(testSessionBuilder() + .setCatalog(LOCAL_CATALOG) + .setSchema(DEFAULT_SCHEMA) + .build()); + queryRunner.installPlugin(new MemoryPlugin()); + queryRunner.createCatalog(LOCAL_CATALOG, "memory", ImmutableMap.of()); + + assertions = new QueryAssertions(queryRunner); + } + + @AfterAll + void teardown() + { + assertions.close(); + } + + @Test + void testJoinWithIsNotDistinctFromOnNulls() + { + String tableName1 = "test_tab_" + randomNameSuffix(); + String tableName2 = "test_tab_" + randomNameSuffix(); + queryRunner.execute(format("CREATE TABLE %s (k1 INT, k2 INT)", tableName1)); + queryRunner.execute(format("CREATE TABLE %s (k1 INT, k2 INT)", tableName2)); + + queryRunner.execute(format("INSERT INTO %s VALUES (1, NULL)", tableName1)); + queryRunner.execute(format("INSERT INTO %s VALUES (1, NULL)", tableName2)); + assertThat(assertions.query(format("SELECT *" + + " FROM %s t" + + " INNER JOIN %s AS s" + + " ON s.k1 IS NOT DISTINCT FROM t.k1" + + " AND s.k2 IS NOT DISTINCT FROM t.k2", tableName1, tableName2))) + .matches("VALUES (1, CAST(NULL AS INTEGER), 1, CAST(NULL AS INTEGER))"); + + queryRunner.execute(format("INSERT INTO %s VALUES (NULL, NULL)", tableName1)); + queryRunner.execute(format("INSERT INTO %s VALUES (NULL, NULL)", tableName2)); + assertThat(assertions.query(format("SELECT *" + + " FROM %s t" + + " INNER JOIN %s AS s" + + " ON s.k1 IS NOT DISTINCT FROM t.k1" + + " AND s.k2 IS NOT DISTINCT FROM t.k2", tableName1, tableName2))) + .matches("VALUES (1, CAST(NULL AS INTEGER), 1, CAST(NULL AS INTEGER))," + + " (CAST(NULL AS INTEGER), CAST(NULL AS INTEGER), CAST(NULL AS INTEGER), CAST(NULL AS INTEGER))"); + + queryRunner.execute(format("INSERT INTO %s VALUES (NULL, 2)", tableName1)); + queryRunner.execute(format("INSERT INTO %s VALUES (3, NULL)", tableName2)); + assertThat(assertions.query(format("SELECT *" + + " FROM %s t" + + " INNER JOIN %s AS s" + + " ON s.k1 IS NOT DISTINCT FROM t.k1" + + " AND s.k2 IS NOT DISTINCT FROM t.k2", tableName1, tableName2))) + .matches("VALUES (1, CAST(NULL AS INTEGER), 1, CAST(NULL AS INTEGER))," + + " (CAST(NULL AS INTEGER), CAST(NULL AS INTEGER), CAST(NULL AS INTEGER), CAST(NULL AS INTEGER))"); + + queryRunner.execute(format("INSERT INTO %s VALUES (2, 2)", tableName1)); + queryRunner.execute(format("INSERT INTO %s VALUES (2, 2)", tableName2)); + assertThat(assertions.query(format("SELECT *" + + " FROM %s t" + + " INNER JOIN %s AS s" + + " ON s.k1 IS NOT DISTINCT FROM t.k1" + + " AND s.k2 IS NOT DISTINCT FROM t.k2", tableName1, tableName2))) + .matches("VALUES (1, CAST(NULL AS INTEGER), 1, CAST(NULL AS INTEGER))," + + " (CAST(NULL AS INTEGER), CAST(NULL AS INTEGER), CAST(NULL AS INTEGER), CAST(NULL AS INTEGER))," + + " (2, 2, 2, 2)"); + } + + @Test + void testJoinWithIsNotDistinctFromOnNullsOnDerivedTables() + { + assertThat(assertions.query("SELECT *" + + " FROM (SELECT 1 AS k1, NULL AS k2) t" + + " INNER JOIN (SELECT 1 AS k1, NULL AS k2) AS s" + + " ON s.k1 IS NOT DISTINCT FROM t.k1" + + " AND s.k2 IS NOT DISTINCT FROM t.k2")) + .matches("VALUES (1, NULL, 1, NULL)"); + + assertThat(assertions.query("SELECT *" + + " FROM (SELECT NULL AS k1, NULL AS k2) t" + + " INNER JOIN (SELECT NULL AS k1, NULL AS k2) AS s" + + " ON s.k1 IS NOT DISTINCT FROM t.k1" + + " AND s.k2 IS NOT DISTINCT FROM t.k2")) + .matches("VALUES (NULL, NULL, NULL, NULL)"); + + assertThat(assertions.query("SELECT *" + + " FROM (SELECT NULL AS k1, 2 AS k2) t" + + " INNER JOIN (SELECT 3 AS k1, NULL AS k2) AS s" + + " ON s.k1 IS NOT DISTINCT FROM t.k1" + + " AND s.k2 IS NOT DISTINCT FROM t.k2")) + .returnsEmptyResult(); + } +}