diff --git a/core/trino-main/src/main/java/io/trino/cost/StatisticRange.java b/core/trino-main/src/main/java/io/trino/cost/StatisticRange.java index 60eb988a4802..a1c963cfbc2a 100644 --- a/core/trino-main/src/main/java/io/trino/cost/StatisticRange.java +++ b/core/trino-main/src/main/java/io/trino/cost/StatisticRange.java @@ -32,6 +32,7 @@ public class StatisticRange { private static final double INFINITE_TO_FINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR = 0.25; private static final double INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR = 0.5; + private static final double DENSITY_HEURISTIC_THRESHOLD = 1e-3; // TODO unify field and method names with SymbolStatsEstimate /** @@ -122,7 +123,17 @@ public double overlapPercentWith(StatisticRange other) if (isInfinite(length()) && isFinite(lengthOfIntersect)) { return INFINITE_TO_FINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR; } + if (lengthOfIntersect > 0) { + double thisDensity = this.distinctValues / length(); + double otherDensity = other.distinctValues / other.length(); + double minDensity = minExcludeNaN(thisDensity, otherDensity); + + if (!isNaN(thisDensity) && !isNaN(otherDensity) + && isFinite(length()) && isFinite(other.length()) + && minDensity < DENSITY_HEURISTIC_THRESHOLD) { + return minExcludeNaN(this.distinctValues, other.distinctValues) / this.distinctValues; + } return lengthOfIntersect / length(); } diff --git a/core/trino-main/src/test/java/io/trino/cost/TestStatisticRange.java b/core/trino-main/src/test/java/io/trino/cost/TestStatisticRange.java index 1dc5f61c0862..827e93728634 100644 --- a/core/trino-main/src/test/java/io/trino/cost/TestStatisticRange.java +++ b/core/trino-main/src/test/java/io/trino/cost/TestStatisticRange.java @@ -20,6 +20,7 @@ import static java.lang.Double.NaN; import static java.lang.Double.POSITIVE_INFINITY; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.AssertionsForClassTypes.within; public class TestStatisticRange { @@ -59,6 +60,55 @@ public void testOverlapPercentWith() assertOverlap(unboundedRange(0.0), unboundedRange(0), 0); } + @Test + public void testLowDensityOverlap() + { + StatisticRange sparseRange = range(1, 3662098119.0, 14); + StatisticRange filterRange = range(1, 4, 4); + + double expectedOverlap = 4.0 / 14.0; + assertOverlap(sparseRange, filterRange, expectedOverlap); + } + + @Test + public void testDensityThresholdBoundary() + { + StatisticRange boundaryRange = range(0, 10000, 10); + StatisticRange smallFilter = range(0, 100, 5); + + double overlap = boundaryRange.overlapPercentWith(smallFilter); + assertThat(overlap).isBetween(0.01, 0.5); + } + + @Test + public void testHighDensityOverlap() + { + StatisticRange denseRange = range(0, 100, 50); + StatisticRange filterRange = range(20, 30, 5); + + assertOverlap(denseRange, filterRange, 0.1); + } + + @Test + public void testVeryLowDensity() + { + StatisticRange verySparse = range(0, 1e9, 10); + StatisticRange filterRange = range(100, 200, 5); + + double expected = 5.0 / 10.0; + double actual = verySparse.overlapPercentWith(filterRange); + assertThat(actual).isCloseTo(expected, within(0.1)); + } + + @Test + public void testDensityWithZeroDistinctValues() + { + StatisticRange zeroDistinct = range(0, 1000, 0); + StatisticRange filterRange = range(100, 200, 5); + + assertOverlap(zeroDistinct, filterRange, 0); + } + @Test public void testIntersect() {